# !pip install numpy
# !pip install pandas
# !pip install sklearn
# !pip install qiskit
image source: https://www.embedded-robotics.com/iris-dataset-classification/
X variables: [Sepal length, Sepal width, Petal length, Petal width], (각 Sepal: 꽃받침, Petal: 꽃잎 의 가로, 세로 길이)
from sklearn.datasets import load_iris
import pandas as pd
import numpy as np
# visualization package
import matplotlib.pyplot as plt
import seaborn as sns
# sample data load
iris = load_iris()
# pring out data with variable types and its description
# print(iris)
# Description of the dataset
print(iris.DESCR)
.. _iris_dataset:
Iris plants dataset
--------------------
**Data Set Characteristics:**
:Number of Instances: 150 (50 in each of three classes)
:Number of Attributes: 4 numeric, predictive attributes and the class
:Attribute Information:
- sepal length in cm
- sepal width in cm
- petal length in cm
- petal width in cm
- class:
- Iris-Setosa
- Iris-Versicolour
- Iris-Virginica
:Summary Statistics:
============== ==== ==== ======= ===== ====================
Min Max Mean SD Class Correlation
============== ==== ==== ======= ===== ====================
sepal length: 4.3 7.9 5.84 0.83 0.7826
sepal width: 2.0 4.4 3.05 0.43 -0.4194
petal length: 1.0 6.9 3.76 1.76 0.9490 (high!)
petal width: 0.1 2.5 1.20 0.76 0.9565 (high!)
============== ==== ==== ======= ===== ====================
:Missing Attribute Values: None
:Class Distribution: 33.3% for each of 3 classes.
:Creator: R.A. Fisher
:Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
:Date: July, 1988
The famous Iris database, first used by Sir R.A. Fisher. The dataset is taken
from Fisher's paper. Note that it's the same as in R, but not as in the UCI
Machine Learning Repository, which has two wrong data points.
This is perhaps the best known database to be found in the
pattern recognition literature. Fisher's paper is a classic in the field and
is referenced frequently to this day. (See Duda & Hart, for example.) The
data set contains 3 classes of 50 instances each, where each class refers to a
type of iris plant. One class is linearly separable from the other 2; the
latter are NOT linearly separable from each other.
.. topic:: References
- Fisher, R.A. "The use of multiple measurements in taxonomic problems"
Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions to
Mathematical Statistics" (John Wiley, NY, 1950).
- Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.
(Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.
- Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
Structure and Classification Rule for Recognition in Partially Exposed
Environments". IEEE Transactions on Pattern Analysis and Machine
Intelligence, Vol. PAMI-2, No. 1, 67-71.
- Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule". IEEE Transactions
on Information Theory, May 1972, 431-433.
- See also: 1988 MLC Proceedings, 54-64. Cheeseman et al"s AUTOCLASS II
conceptual clustering system finds 3 classes in the data.
- Many, many more ...
# feature_names(x variables) 와 target(y variable)을 잘 나타내도록 data frame 생성
data_iris = pd.DataFrame(data=iris.data, columns=iris.feature_names)
data_iris['species'] = iris.target
# Mapping the labels-'species' with numbers
data_iris['species'] = data_iris['species'].map(
{0: "setosa", 1: "versicolor", 2: "virginica"})
print(data_iris)
# Plot scatter plots and density distribution plots feature-wise WITH labels
sns.set(font_scale=1.5)
sns.pairplot(data_iris, hue="species", height=3)
plt.show()
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \
0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2
4 5.0 3.6 1.4 0.2
.. ... ... ... ...
145 6.7 3.0 5.2 2.3
146 6.3 2.5 5.0 1.9
147 6.5 3.0 5.2 2.0
148 6.2 3.4 5.4 2.3
149 5.9 3.0 5.1 1.8
species
0 setosa
1 setosa
2 setosa
3 setosa
4 setosa
.. ...
145 virginica
146 virginica
147 virginica
148 virginica
149 virginica
[150 rows x 5 columns]
# Plot scatter plots and density distribution plots feature-wise WITHOUT any labels
sns.set(font_scale=1.5)
sns.pairplot(data_iris, height=3)
plt.show()
# Descriptive statistics of the dataset
data_iris.describe()
| sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | |
|---|---|---|---|---|
| count | 150.000000 | 150.000000 | 150.000000 | 150.000000 |
| mean | 5.843333 | 3.057333 | 3.758000 | 1.199333 |
| std | 0.828066 | 0.435866 | 1.765298 | 0.762238 |
| min | 4.300000 | 2.000000 | 1.000000 | 0.100000 |
| 25% | 5.100000 | 2.800000 | 1.600000 | 0.300000 |
| 50% | 5.800000 | 3.000000 | 4.350000 | 1.300000 |
| 75% | 6.400000 | 3.300000 | 5.100000 | 1.800000 |
| max | 7.900000 | 4.400000 | 6.900000 | 2.500000 |
data_iris['species'].value_counts()
# pd.crosstab(index=data_iris['species'], columns="count")
setosa 50 versicolor 50 virginica 50 Name: species, dtype: int64
# Calculate the Euclidean distance of the data
iris_euclidean_dist = np.linalg.norm(data_iris.iloc[:, 0:4].values, axis=1)
iris_euclidean_dist
array([ 6.34507683, 5.91692488, 5.83609458, 5.7497826 , 6.32139225,
6.88621812, 5.8966092 , 6.23297682, 5.45618915, 5.98999165,
6.71863081, 6.09918027, 5.83180932, 5.35817133, 7.14982517,
7.36613874, 6.79852925, 6.34901567, 7.06470098, 6.54140658,
6.60681466, 6.48922183, 5.92958683, 6.32771681, 6.18465844,
6.04979338, 6.26737585, 6.44825558, 6.37181293, 5.91016074,
5.93717104, 6.56734345, 6.79043445, 7.06328535, 5.99249531,
6.05970296, 6.65056389, 6.2401923 , 5.48543526, 6.31347765,
6.24739946, 5.22685374, 5.59732079, 6.33798075, 6.64981203,
5.83866423, 6.56124988, 5.77927331, 6.63852393, 6.15548536,
9.12633552, 8.58487041, 9.13673902, 7.29588925, 8.5732141 ,
7.89113427, 8.67352293, 6.45445583, 8.64985549, 7.17635005,
6.5 , 7.98122798, 7.60526134, 8.3468557 , 7.37699126,
8.70746806, 7.92842986, 7.6642025 , 8.11048704, 7.35051019,
8.44570897, 7.92085854, 8.49705831, 8.28130425, 8.33966426,
8.59534758, 8.89269363, 9.04322951, 8.1798533 , 7.24568837,
7.18748913, 7.12039325, 7.58814865, 8.47702778, 7.78845299,
8.38868285, 8.87918915, 8.12588457, 7.67202711, 7.36138574,
7.60328876, 8.32646384, 7.60526134, 6.49461315, 7.61445993,
7.78267306, 7.76079893, 8.18718511, 6.5169011 , 7.67007171,
9.63483264, 8.39940474, 9.93126377, 9.09395404, 9.47259204,
10.71120908, 7.30753036, 10.22888068, 9.38189746, 10.40480658,
9.08295106, 8.94147639, 9.48156105, 8.23043134, 8.55862138,
9.19673855, 9.20543318, 11.11125555, 10.90642013, 8.2516665 ,
9.77905926, 8.19817053, 10.77125805, 8.61568337, 9.62704524,
10.06578363, 8.51821578, 8.57088093, 9.19619487, 9.85088828,
10.16956243, 11.03675677, 9.21954446, 8.70574523, 8.79147314,
10.52568288, 9.4005319 , 9.16842407, 8.44274837, 9.52837867,
9.57183368, 9.40850679, 8.39940474, 9.8275124 , 9.72213968,
9.28547252, 8.63423419, 9.07138358, 9.18966811, 8.54751426])
# Create new column of Euclidean distance
data_iris['Euclid_dist'] = iris_euclidean_dist
data_iris['Euclid_dist_sq'] = iris_euclidean_dist**2
# # Function that calculates Mahalanobis distance
# def mahalanobis(x=None, data=None, cov=None):
# x_mu = x - x.mean()
# if not cov:
# cov = np.cov(data.values.T)
# inv_covmat = np.linalg.inv(cov)
# left = np.dot(x_mu, inv_covmat)
# mahal = np.dot(left, x_mu.T)
# return mahal.diagonal()
# # Calculate the Mahalanobis distance of the data
# Mahal_dist = mahalanobis(x=data_iris.iloc[:,range(4)], data=data_iris.iloc[:,range(4)])
# Mahal_dist
# # Create new column of Mahalanobis distance
# data_iris['Mahal_dist'] = Mahal_dist
# data_iris['Mahal_dist_sq'] = Mahal_dist**2
data_iris[['species', 'Euclid_dist', 'Euclid_dist_sq']]
# data_iris[['species', 'Euclid_dist','Euclid_dist_sq','Mahal_dist', 'Mahal_dist_sq']]
| species | Euclid_dist | Euclid_dist_sq | |
|---|---|---|---|
| 0 | setosa | 6.345077 | 40.26 |
| 1 | setosa | 5.916925 | 35.01 |
| 2 | setosa | 5.836095 | 34.06 |
| 3 | setosa | 5.749783 | 33.06 |
| 4 | setosa | 6.321392 | 39.96 |
| ... | ... | ... | ... |
| 145 | virginica | 9.285473 | 86.22 |
| 146 | virginica | 8.634234 | 74.55 |
| 147 | virginica | 9.071384 | 82.29 |
| 148 | virginica | 9.189668 | 84.45 |
| 149 | virginica | 8.547514 | 73.06 |
150 rows × 3 columns
# Plot scatter plots and density distribution plots feature-wise WITH labels
sns.set(font_scale=1.5)
sns.pairplot(data_iris, hue="species", height=3)
plt.show()
# Plot scatter plots and density distribution plots feature-wise WITHOUT any labels
sns.set(font_scale=1.5)
sns.pairplot(data_iris, height=3)
plt.show()
sns.pairplot(data_iris[['species', 'Euclid_dist',
'Euclid_dist_sq']], hue="species", height=3)
# sns.pairplot(data_iris[['species', 'Euclid_dist', 'Euclid_dist_sq', 'Mahal_dist','Mahal_dist_sq']], hue="species", height=3)
<seaborn.axisgrid.PairGrid at 0x23a492d8c70>
sns.pairplot(data_iris[['species', 'Euclid_dist', 'Euclid_dist_sq']], height=3)
# sns.pairplot(data_iris[['species', 'Euclid_dist', 'Euclid_dist_sq', 'Mahal_dist','Mahal_dist_sq']], height=3)
<seaborn.axisgrid.PairGrid at 0x23a48da4730>
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
def draw_graph(G, col, pos):
plt.figure(figsize=(12, 8))
default_axes = plt.axes(frameon=True)
nx.draw_networkx(G, node_color=col, node_size=600,
alpha=0.8, ax=default_axes, pos=pos, font_size=16)
edge_labels = nx.get_edge_attributes(G, 'weight')
nx.draw_networkx_edge_labels(
G, pos=pos, edge_labels=edge_labels, font_size=16)
n = 6 # number of nodes in graph
np.random.seed(150)
edge_weights = np.random.randint(1, 5, size=(n, n))
edge_weights = edge_weights * edge_weights.T / 2
G = nx.Graph()
G.add_nodes_from(np.arange(0, n, 1))
for i in range(n):
for j in range(n):
if i > j:
G.add_edge(i, j, weight=edge_weights[i, j])
colors = ['g' for node in G.nodes()]
pos = nx.spring_layout(G)
# graph G: nodes
G.nodes
NodeView((0, 1, 2, 3, 4, 5))
# graph G: edges
G.edges
EdgeView([(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (1, 2), (1, 3), (1, 4), (1, 5), (2, 3), (2, 4), (2, 5), (3, 4), (3, 5), (4, 5)])
# graph G: edges with weights
G.edges.data()
EdgeDataView([(0, 1, {'weight': 1.5}), (0, 2, {'weight': 6.0}), (0, 3, {'weight': 1.0}), (0, 4, {'weight': 0.5}), (0, 5, {'weight': 3.0}), (1, 2, {'weight': 6.0}), (1, 3, {'weight': 3.0}), (1, 4, {'weight': 2.0}), (1, 5, {'weight': 2.0}), (2, 3, {'weight': 0.5}), (2, 4, {'weight': 4.0}), (2, 5, {'weight': 4.0}), (3, 4, {'weight': 2.0}), (3, 5, {'weight': 4.0}), (4, 5, {'weight': 6.0})])
# Plot of the give graph G
draw_graph(G, colors, pos)
# Adjacency matrix of weighted graph
w = np.zeros([n, n])
for i in range(n):
for j in range(n):
temp = G.get_edge_data(i, j, default=0)
if temp != 0:
w[i, j] = temp['weight']
w
array([[0. , 1.5, 6. , 1. , 0.5, 3. ],
[1.5, 0. , 6. , 3. , 2. , 2. ],
[6. , 6. , 0. , 0.5, 4. , 4. ],
[1. , 3. , 0.5, 0. , 2. , 4. ],
[0.5, 2. , 4. , 2. , 0. , 6. ],
[3. , 2. , 4. , 4. , 6. , 0. ]])
best_cost_brute = 0
for b in range(2**n):
x = [int(t) for t in reversed(list(bin(b)[2:].zfill(n)))]
cost = 0
for i in range(n):
for j in range(n):
cost = cost + w[i, j] * x[i] * (1-x[j])
if best_cost_brute < cost:
best_cost_brute = cost
xbest_brute = x
print('case =', '%-20s' % str(x), ' cost =', '%-6s' %
str(cost), ' try =', str(b+1))
colors_brute = ['g' if xbest_brute[i] == 0 else 'c' for i in range(n)]
print('\nBest case(solution) =', '%-20s' %
str(xbest_brute), ' cost =', '%-6s' % str(best_cost_brute))
case = [0, 0, 0, 0, 0, 0] cost = 0.0 try = 1 case = [1, 0, 0, 0, 0, 0] cost = 12.0 try = 2 case = [0, 1, 0, 0, 0, 0] cost = 14.5 try = 3 case = [1, 1, 0, 0, 0, 0] cost = 23.5 try = 4 case = [0, 0, 1, 0, 0, 0] cost = 20.5 try = 5 case = [1, 0, 1, 0, 0, 0] cost = 20.5 try = 6 case = [0, 1, 1, 0, 0, 0] cost = 23.0 try = 7 case = [1, 1, 1, 0, 0, 0] cost = 20.0 try = 8 case = [0, 0, 0, 1, 0, 0] cost = 10.5 try = 9 case = [1, 0, 0, 1, 0, 0] cost = 20.5 try = 10 case = [0, 1, 0, 1, 0, 0] cost = 19.0 try = 11 case = [1, 1, 0, 1, 0, 0] cost = 26.0 try = 12 case = [0, 0, 1, 1, 0, 0] cost = 30.0 try = 13 case = [1, 0, 1, 1, 0, 0] cost = 28.0 try = 14 case = [0, 1, 1, 1, 0, 0] cost = 26.5 try = 15 case = [1, 1, 1, 1, 0, 0] cost = 21.5 try = 16 case = [0, 0, 0, 0, 1, 0] cost = 14.5 try = 17 case = [1, 0, 0, 0, 1, 0] cost = 25.5 try = 18 case = [0, 1, 0, 0, 1, 0] cost = 25.0 try = 19 case = [1, 1, 0, 0, 1, 0] cost = 33.0 try = 20 case = [0, 0, 1, 0, 1, 0] cost = 27.0 try = 21 case = [1, 0, 1, 0, 1, 0] cost = 26.0 try = 22 case = [0, 1, 1, 0, 1, 0] cost = 25.5 try = 23 case = [1, 1, 1, 0, 1, 0] cost = 21.5 try = 24 case = [0, 0, 0, 1, 1, 0] cost = 21.0 try = 25 case = [1, 0, 0, 1, 1, 0] cost = 30.0 try = 26 case = [0, 1, 0, 1, 1, 0] cost = 25.5 try = 27 case = [1, 1, 0, 1, 1, 0] cost = 31.5 try = 28 case = [0, 0, 1, 1, 1, 0] cost = 32.5 try = 29 case = [1, 0, 1, 1, 1, 0] cost = 29.5 try = 30 case = [0, 1, 1, 1, 1, 0] cost = 25.0 try = 31 case = [1, 1, 1, 1, 1, 0] cost = 19.0 try = 32 case = [0, 0, 0, 0, 0, 1] cost = 19.0 try = 33 case = [1, 0, 0, 0, 0, 1] cost = 25.0 try = 34 case = [0, 1, 0, 0, 0, 1] cost = 29.5 try = 35 case = [1, 1, 0, 0, 0, 1] cost = 32.5 try = 36 case = [0, 0, 1, 0, 0, 1] cost = 31.5 try = 37 case = [1, 0, 1, 0, 0, 1] cost = 25.5 try = 38 case = [0, 1, 1, 0, 0, 1] cost = 30.0 try = 39 case = [1, 1, 1, 0, 0, 1] cost = 21.0 try = 40 case = [0, 0, 0, 1, 0, 1] cost = 21.5 try = 41 case = [1, 0, 0, 1, 0, 1] cost = 25.5 try = 42 case = [0, 1, 0, 1, 0, 1] cost = 26.0 try = 43 case = [1, 1, 0, 1, 0, 1] cost = 27.0 try = 44 case = [0, 0, 1, 1, 0, 1] cost = 33.0 try = 45 case = [1, 0, 1, 1, 0, 1] cost = 25.0 try = 46 case = [0, 1, 1, 1, 0, 1] cost = 25.5 try = 47 case = [1, 1, 1, 1, 0, 1] cost = 14.5 try = 48 case = [0, 0, 0, 0, 1, 1] cost = 21.5 try = 49 case = [1, 0, 0, 0, 1, 1] cost = 26.5 try = 50 case = [0, 1, 0, 0, 1, 1] cost = 28.0 try = 51 case = [1, 1, 0, 0, 1, 1] cost = 30.0 try = 52 case = [0, 0, 1, 0, 1, 1] cost = 26.0 try = 53 case = [1, 0, 1, 0, 1, 1] cost = 19.0 try = 54 case = [0, 1, 1, 0, 1, 1] cost = 20.5 try = 55 case = [1, 1, 1, 0, 1, 1] cost = 10.5 try = 56 case = [0, 0, 0, 1, 1, 1] cost = 20.0 try = 57 case = [1, 0, 0, 1, 1, 1] cost = 23.0 try = 58 case = [0, 1, 0, 1, 1, 1] cost = 20.5 try = 59 case = [1, 1, 0, 1, 1, 1] cost = 20.5 try = 60 case = [0, 0, 1, 1, 1, 1] cost = 23.5 try = 61 case = [1, 0, 1, 1, 1, 1] cost = 14.5 try = 62 case = [0, 1, 1, 1, 1, 1] cost = 12.0 try = 63 case = [1, 1, 1, 1, 1, 1] cost = 0.0 try = 64 Best case(solution) = [1, 1, 0, 0, 1, 0] cost = 33.0
draw_graph(G, colors_brute, pos)
from qiskit import QuantumCircuit, Aer
from qiskit.circuit import Parameter
def maxcut_obj(solution, graph):
obj = 0
for i, j in graph.edges():
if solution[i] != solution[j]:
obj -= 1 * w[i][j]
return obj # cost function(hamiltonian)
def compute_expectation(counts, graph):
avg = 0
sum_count = 0
for bit_string, count in counts.items():
obj = maxcut_obj(bit_string, graph)
avg += obj * count
sum_count += count # sum_count is shot
return avg/sum_count # minimize this function
def create_qaoa_circ(graph, theta):
nqubits = len(graph.nodes())
n_layers = len(theta)//2
beta = theta[:n_layers]
gamma = theta[n_layers:]
qc = QuantumCircuit(nqubits)
qc.h(range(nqubits))
for layer_index in range(n_layers):
for pair in list(graph.edges()):
qc.rzz(2 * gamma[layer_index] * w[pair[0]]
[pair[1]], pair[0], pair[1])
for qubit in range(nqubits):
qc.rx(2 * beta[layer_index], qubit)
qc.measure_all()
return qc
def get_expectation(graph, shots=512):
backend = Aer.get_backend('qasm_simulator')
backend.shots = shots
def execute_circ(theta):
qc = create_qaoa_circ(graph, theta)
counts = backend.run(qc, seed_simulator=10,
nshots=512).result().get_counts()
return compute_expectation(counts, graph)
return execute_circ
from scipy.optimize import minimize
expectation = get_expectation(G)
p = 1 # 32 64
res = minimize(expectation, np.ones(p*2)*np.pi/2,
method='COBYLA', options={'maxiter': 2500})
res
fun: -24.17529296875
maxcv: 0.0
message: 'Optimization terminated successfully.'
nfev: 31
status: 1
success: True
x: array([2.57910398, 1.5991255 ])
from qiskit.visualization import plot_histogram
backend = Aer.get_backend('aer_simulator')
backend.shots = 2**14
qc_res = create_qaoa_circ(G, res.x)
counts = backend.run(qc_res, seed_simulator=10).result().get_counts()
plot_histogram(counts, figsize=(20, 5), title='512 Shots')
# Plot the Quantum Circuit of QAOA
# qc_res.draw(output='mpl', plot_barriers=True).savefig('QAOA_circuit.png', dpi=720)
qc_res.draw(output='mpl', plot_barriers=True)
# qc_res.draw(output='mpl', plot_barriers=True, fold=-1).savefig('QAOA_circuit(full).png', dpi=720)
qc_res.draw(output='mpl', plot_barriers=True, fold=-1)
str(counts)
"{'001001': 54, '100001': 23, '001010': 15, '111111': 3, '101001': 79, '111010': 82, '001000': 16, '101000': 30, '011011': 22, '011110': 27, '000001': 2, '110110': 67, '010111': 23, '101100': 1, '000011': 6, '011111': 8, '110010': 16, '101110': 2, '000101': 67, '010001': 5, '001110': 4, '111011': 21, '100101': 65, '010110': 75, '110011': 3, '000100': 16, '101101': 8, '000111': 8, '100100': 25, '111100': 5, '001101': 34, '010010': 9, '101111': 5, '000110': 4, '110001': 13, '011010': 62, '100000': 8, '000000': 6, '110111': 12, '001100': 2, '100011': 12, '100111': 13, '011000': 9, '111000': 6, '000010': 3, '110101': 16, '101011': 2, '111001': 10, '010000': 5, '001011': 1, '111110': 3, '010011': 4, '010100': 1, '011100': 5, '010101': 1}"
# Sort the counted shot results
{k: v for k, v in sorted(counts.items(), key=lambda item: item[1])}
{'101100': 1,
'001011': 1,
'010100': 1,
'010101': 1,
'000001': 2,
'101110': 2,
'001100': 2,
'101011': 2,
'111111': 3,
'110011': 3,
'000010': 3,
'111110': 3,
'001110': 4,
'000110': 4,
'010011': 4,
'010001': 5,
'111100': 5,
'101111': 5,
'010000': 5,
'011100': 5,
'000011': 6,
'000000': 6,
'111000': 6,
'011111': 8,
'101101': 8,
'000111': 8,
'100000': 8,
'010010': 9,
'011000': 9,
'111001': 10,
'110111': 12,
'100011': 12,
'110001': 13,
'100111': 13,
'001010': 15,
'001000': 16,
'110010': 16,
'000100': 16,
'110101': 16,
'111011': 21,
'011011': 22,
'100001': 23,
'010111': 23,
'100100': 25,
'011110': 27,
'101000': 30,
'001101': 34,
'001001': 54,
'011010': 62,
'100101': 65,
'110110': 67,
'000101': 67,
'010110': 75,
'101001': 79,
'111010': 82}
result_col = list(map(int, list(max(counts, key=counts.get))))
result_colors = ['g' if result_col[i] == 0 else 'c' for i in range(n)]
# Result of Brute Force algorithm
draw_graph(G, colors_brute, pos)
# Result of QAOA
draw_graph(G, result_colors, pos)
print('xbest_brute :', xbest_brute)
print('QAOA :', result_col)
xbest_brute : [1, 1, 0, 0, 1, 0] QAOA : [1, 1, 1, 0, 1, 0]
from tqdm import tqdm
p = 16
res = []
for i in tqdm(range(1, p+1)):
res.append(minimize(expectation, np.ones(i*2)*np.pi/2,
method='COBYLA', options={'maxiter': 2500}))
100%|██████████| 16/16 [00:26<00:00, 1.66s/it]
res[0:2]
[ fun: -24.17529296875
maxcv: 0.0
message: 'Optimization terminated successfully.'
nfev: 31
status: 1
success: True
x: array([2.57910398, 1.5991255 ]),
fun: -24.716796875
maxcv: 0.0
message: 'Optimization terminated successfully.'
nfev: 54
status: 1
success: True
x: array([2.52461869, 1.63875349, 1.61624568, 1.52923708])]
approx = []
for i in range(p):
approx.append(-res[i].fun/best_cost_brute)
x = np.arange(1, p+1, 1)
plt.figure(figsize=(8, 6))
plt.plot(x, approx, marker='o', markersize=6, c='k', linestyle='--')
plt.ylim((0.5, 1))
plt.xlim(0, p)
plt.xlabel('iteration')
plt.ylabel('Approximation')
plt.grid(True)
plt.show()
best_p = np.argmax(approx)
print("The best p(iterationo number) is", best_p)
The best p(iterationo number) is 11
res[best_p].x
array([2.69931677, 1.70145888, 1.83501131, 1.52640593, 1.70235575,
1.52533461, 1.53010659, 1.94876412, 1.40228443, 1.54544357,
1.47815615, 1.48456692, 1.58893494, 1.57102304, 1.58103879,
1.68907833, 1.40984573, 1.6220806 , 1.63468791, 1.62148146,
1.52878007, 1.70463034, 1.58593247, 1.60855217])
from qiskit.visualization import plot_histogram
backend = Aer.get_backend('aer_simulator')
backend.shots = 512
qc_res = create_qaoa_circ(G, res[best_p].x)
counts = backend.run(qc_res, seed_simulator=10).result().get_counts()
plot_histogram(counts, figsize=(20, 5), title='512 Shots')
result_col = list(map(int, list(max(counts, key=counts.get))))
result_colors = ['g' if result_col[i] == 0 else 'c' for i in range(n)]
print('xbest_brute :', xbest_brute)
print('QAOA :', result_col)
xbest_brute : [1, 1, 0, 0, 1, 0] QAOA : [1, 1, 0, 0, 1, 0]
draw_graph(G, colors_brute, pos)
draw_graph(G, result_colors, pos)
print('Best solution - Brute Force : ' +
str(xbest_brute) + ', cost = ' + str(best_cost_brute))
print('Best solution - QAOA : ' + str(result_col) +
', cost = ' + str(-res[best_p].fun))
Best solution - Brute Force : [1, 1, 0, 0, 1, 0], cost = 33.0 Best solution - QAOA : [1, 1, 0, 0, 1, 0], cost = 28.9130859375
data_iris
| sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | species | Euclid_dist | Euclid_dist_sq | |
|---|---|---|---|---|---|---|---|
| 0 | 5.1 | 3.5 | 1.4 | 0.2 | setosa | 6.345077 | 40.26 |
| 1 | 4.9 | 3.0 | 1.4 | 0.2 | setosa | 5.916925 | 35.01 |
| 2 | 4.7 | 3.2 | 1.3 | 0.2 | setosa | 5.836095 | 34.06 |
| 3 | 4.6 | 3.1 | 1.5 | 0.2 | setosa | 5.749783 | 33.06 |
| 4 | 5.0 | 3.6 | 1.4 | 0.2 | setosa | 6.321392 | 39.96 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 145 | 6.7 | 3.0 | 5.2 | 2.3 | virginica | 9.285473 | 86.22 |
| 146 | 6.3 | 2.5 | 5.0 | 1.9 | virginica | 8.634234 | 74.55 |
| 147 | 6.5 | 3.0 | 5.2 | 2.0 | virginica | 9.071384 | 82.29 |
| 148 | 6.2 | 3.4 | 5.4 | 2.3 | virginica | 9.189668 | 84.45 |
| 149 | 5.9 | 3.0 | 5.1 | 1.8 | virginica | 8.547514 | 73.06 |
150 rows × 7 columns
num_sample1 = 4
num_sample2 = 4
# sample_df1 = data_iris[data_iris['species'] =='setosa'].sample(num_sample1).sort_index()
sample_df2 = data_iris[data_iris['species'] =='versicolor'].sample(num_sample1).sort_index()
sample_df3 = data_iris[data_iris['species'] =='virginica'].sample(num_sample2).sort_index()
# data_iris_sample = pd.concat([sample_df1, sample_df2, sample_df3])
data_iris_sample = pd.concat([sample_df2, sample_df3])
data_iris_sample
| sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | species | Euclid_dist | Euclid_dist_sq | |
|---|---|---|---|---|---|---|---|
| 64 | 5.6 | 2.9 | 3.6 | 1.3 | versicolor | 7.376991 | 54.42 |
| 65 | 6.7 | 3.1 | 4.4 | 1.4 | versicolor | 8.707468 | 75.82 |
| 77 | 6.7 | 3.0 | 5.0 | 1.7 | versicolor | 9.043230 | 81.78 |
| 83 | 6.0 | 2.7 | 5.1 | 1.6 | versicolor | 8.477028 | 71.86 |
| 114 | 5.8 | 2.8 | 5.1 | 2.4 | virginica | 8.558621 | 73.25 |
| 125 | 7.2 | 3.2 | 6.0 | 1.8 | virginica | 10.065784 | 101.32 |
| 140 | 6.7 | 3.1 | 5.6 | 2.4 | virginica | 9.571834 | 91.62 |
| 141 | 6.9 | 3.1 | 5.1 | 2.3 | virginica | 9.408507 | 88.52 |
data_iris_qaoa = data_iris_sample[[
'sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']]
data_iris_qaoa = np.array(data_iris_qaoa)
data_iris_qaoa_label = iris.target[data_iris_sample.index]
data_iris_qaoa
array([[5.6, 2.9, 3.6, 1.3],
[6.7, 3.1, 4.4, 1.4],
[6.7, 3. , 5. , 1.7],
[6. , 2.7, 5.1, 1.6],
[5.8, 2.8, 5.1, 2.4],
[7.2, 3.2, 6. , 1.8],
[6.7, 3.1, 5.6, 2.4],
[6.9, 3.1, 5.1, 2.3]])
data_iris_qaoa_label
array([1, 1, 1, 1, 2, 2, 2, 2])
len(data_iris_qaoa_label)
8
# Function to calculate the distance between given two data points
import math
def dist(a, b):
"Euclidean dist between two lists"
d = np.linalg.norm(np.array(a) - np.array(b), axis=0)
return round(d, 4)
import random
# Assign the number of nodes, edge connection, and its weight of the Graph.
n = len(data_iris_qaoa_label)
data = data_iris_qaoa
label = data_iris_qaoa_label
datapoints = data.tolist()
print("Data points:", datapoints)
labels = label
print("Data labels:", labels)
G = nx.Graph()
G.add_nodes_from(np.arange(0, n, 1))
for i in range(n):
for j in range(n):
if i > j:
G.add_edge(i, j, weight=dist(datapoints[i], datapoints[j]))
colors = ['g' for node in G.nodes()]
pos = nx.spring_layout(G)
Data points: [[5.6, 2.9, 3.6, 1.3], [6.7, 3.1, 4.4, 1.4], [6.7, 3.0, 5.0, 1.7], [6.0, 2.7, 5.1, 1.6], [5.8, 2.8, 5.1, 2.4], [7.2, 3.2, 6.0, 1.8], [6.7, 3.1, 5.6, 2.4], [6.9, 3.1, 5.1, 2.3]] Data labels: [1 1 1 1 2 2 2 2]
draw_graph(G, colors, pos)
# Calculate Adjacency matrix of the given Graph
w = np.zeros([n, n])
for i in range(n):
for j in range(n):
temp = G.get_edge_data(i, j, default=0)
if temp != 0:
w[i, j] = temp['weight']
w
array([[0. , 1.3784, 1.8276, 1.5937, 1.8735, 2.9428, 2.5417, 2.2316],
[1.3784, 0. , 0.6782, 1.0863, 1.546 , 1.7263, 1.562 , 1.1576],
[1.8276, 0.6782, 0. , 0.7746, 1.1619, 1.1402, 0.9274, 0.6481],
[1.5937, 1.0863, 0.7746, 0. , 0.8307, 1.5937, 1.241 , 1.2083],
[1.8735, 1.546 , 1.1619, 0.8307, 0. , 1.8138, 1.0724, 1.1446],
[2.9428, 1.7263, 1.1402, 1.5937, 1.8138, 0. , 0.8832, 1.077 ],
[2.5417, 1.562 , 0.9274, 1.241 , 1.0724, 0.8832, 0. , 0.5477],
[2.2316, 1.1576, 0.6481, 1.2083, 1.1446, 1.077 , 0.5477, 0. ]])
best_cost_brute = 0
for b in range(2**n):
x = [int(t) for t in reversed(list(bin(b)[2:].zfill(n)))]
cost = 0
for i in range(n):
for j in range(n):
cost = cost + w[i, j] * x[i] * (1-x[j])
if best_cost_brute < cost:
best_cost_brute = cost
xbest_brute = x
print('case =', '%-30s' % str(x), ' cost =', '%-24s' %
str(cost), 'try =', str(b+1))
colors_brute = ['g' if xbest_brute[i] == 0 else 'c' for i in range(n)]
print('\nBest case(solution) =', '%-30s' %
str(xbest_brute), ' cost =', '%-24s' % str(best_cost_brute))
case = [0, 0, 0, 0, 0, 0, 0, 0] cost = 0.0 try = 1 case = [1, 0, 0, 0, 0, 0, 0, 0] cost = 14.3893 try = 2 case = [0, 1, 0, 0, 0, 0, 0, 0] cost = 9.1348 try = 3 case = [1, 1, 0, 0, 0, 0, 0, 0] cost = 20.7673 try = 4 case = [0, 0, 1, 0, 0, 0, 0, 0] cost = 7.158 try = 5 case = [1, 0, 1, 0, 0, 0, 0, 0] cost = 17.8921 try = 6 case = [0, 1, 1, 0, 0, 0, 0, 0] cost = 14.936399999999999 try = 7 case = [1, 1, 1, 0, 0, 0, 0, 0] cost = 22.913699999999995 try = 8 case = [0, 0, 0, 1, 0, 0, 0, 0] cost = 8.328299999999999 try = 9 case = [1, 0, 0, 1, 0, 0, 0, 0] cost = 19.5302 try = 10 case = [0, 1, 0, 1, 0, 0, 0, 0] cost = 15.2905 try = 11 case = [1, 1, 0, 1, 0, 0, 0, 0] cost = 23.735599999999998 try = 12 case = [0, 0, 1, 1, 0, 0, 0, 0] cost = 13.937100000000001 try = 13 case = [1, 0, 1, 1, 0, 0, 0, 0] cost = 21.4838 try = 14 case = [0, 1, 1, 1, 0, 0, 0, 0] cost = 19.5429 try = 15 case = [1, 1, 1, 1, 0, 0, 0, 0] cost = 24.3328 try = 16 case = [0, 0, 0, 0, 1, 0, 0, 0] cost = 9.442900000000002 try = 17 case = [1, 0, 0, 0, 1, 0, 0, 0] cost = 20.085199999999997 try = 18 case = [0, 1, 0, 0, 1, 0, 0, 0] cost = 15.485700000000001 try = 19 case = [1, 1, 0, 0, 1, 0, 0, 0] cost = 23.371199999999998 try = 20 case = [0, 0, 1, 0, 1, 0, 0, 0] cost = 14.277100000000003 try = 21 case = [1, 0, 1, 0, 1, 0, 0, 0] cost = 21.2642 try = 22 case = [0, 1, 1, 0, 1, 0, 0, 0] cost = 18.9635 try = 23 case = [1, 1, 1, 0, 1, 0, 0, 0] cost = 23.1938 try = 24 case = [0, 0, 0, 1, 1, 0, 0, 0] cost = 16.109799999999996 try = 25 case = [1, 0, 0, 1, 1, 0, 0, 0] cost = 23.5647 try = 26 case = [0, 1, 0, 1, 1, 0, 0, 0] cost = 19.98 try = 27 case = [1, 1, 0, 1, 1, 0, 0, 0] cost = 24.678100000000004 try = 28 case = [0, 0, 1, 1, 1, 0, 0, 0] cost = 19.3948 try = 29 case = [1, 0, 1, 1, 1, 0, 0, 0] cost = 23.1945 try = 30 case = [0, 1, 1, 1, 1, 0, 0, 0] cost = 21.908600000000003 try = 31 case = [1, 1, 1, 1, 1, 0, 0, 0] cost = 22.9515 try = 32 case = [0, 0, 0, 0, 0, 1, 0, 0] cost = 11.177000000000001 try = 33 case = [1, 0, 0, 0, 0, 1, 0, 0] cost = 19.6807 try = 34 case = [0, 1, 0, 0, 0, 1, 0, 0] cost = 16.8592 try = 35 case = [1, 1, 0, 0, 0, 1, 0, 0] cost = 22.606099999999998 try = 36 case = [0, 0, 1, 0, 0, 1, 0, 0] cost = 16.0546 try = 37 case = [1, 0, 1, 0, 0, 1, 0, 0] cost = 20.903099999999995 try = 38 case = [0, 1, 1, 0, 0, 1, 0, 0] cost = 20.380399999999995 try = 39 case = [1, 1, 1, 0, 0, 1, 0, 0] cost = 22.472099999999998 try = 40 case = [0, 0, 0, 1, 0, 1, 0, 0] cost = 16.3179 try = 41 case = [1, 0, 0, 1, 0, 1, 0, 0] cost = 21.6342 try = 42 case = [0, 1, 0, 1, 0, 1, 0, 0] cost = 19.8275 try = 43 case = [1, 1, 0, 1, 0, 1, 0, 0] cost = 22.387 try = 44 case = [0, 0, 1, 1, 0, 1, 0, 0] cost = 19.646299999999997 try = 45 case = [1, 0, 1, 1, 0, 1, 0, 0] cost = 21.307399999999994 try = 46 case = [0, 1, 1, 1, 0, 1, 0, 0] cost = 21.799500000000002 try = 47 case = [1, 1, 1, 1, 0, 1, 0, 0] cost = 20.7038 try = 48 case = [0, 0, 0, 0, 1, 1, 0, 0] cost = 16.9923 try = 49 case = [1, 0, 0, 0, 1, 1, 0, 0] cost = 21.748999999999995 try = 50 case = [0, 1, 0, 0, 1, 1, 0, 0] cost = 19.582499999999996 try = 51 case = [1, 1, 0, 0, 1, 1, 0, 0] cost = 21.582399999999993 try = 52 case = [0, 0, 1, 0, 1, 1, 0, 0] cost = 19.546099999999996 try = 53 case = [1, 0, 1, 0, 1, 1, 0, 0] cost = 20.647599999999997 try = 54 case = [0, 1, 1, 0, 1, 1, 0, 0] cost = 20.779899999999998 try = 55 case = [1, 1, 1, 0, 1, 1, 0, 0] cost = 19.124599999999994 try = 56 case = [0, 0, 0, 1, 1, 1, 0, 0] cost = 20.471799999999995 try = 57 case = [1, 0, 0, 1, 1, 1, 0, 0] cost = 22.041099999999993 try = 58 case = [0, 1, 0, 1, 1, 1, 0, 0] cost = 20.889399999999995 try = 59 case = [1, 1, 0, 1, 1, 1, 0, 0] cost = 19.701899999999995 try = 60 case = [0, 0, 1, 1, 1, 1, 0, 0] cost = 21.476399999999998 try = 61 case = [1, 0, 1, 1, 1, 1, 0, 0] cost = 19.390499999999996 try = 62 case = [0, 1, 1, 1, 1, 1, 0, 0] cost = 20.537599999999998 try = 63 case = [1, 1, 1, 1, 1, 1, 0, 0] cost = 15.6949 try = 64 case = [0, 0, 0, 0, 0, 0, 1, 0] cost = 8.775400000000001 try = 65 case = [1, 0, 0, 0, 0, 0, 1, 0] cost = 18.081299999999995 try = 66 case = [0, 1, 0, 0, 0, 0, 1, 0] cost = 14.786200000000003 try = 67 case = [1, 1, 0, 0, 0, 0, 1, 0] cost = 21.335299999999993 try = 68 case = [0, 0, 1, 0, 0, 0, 1, 0] cost = 14.0786 try = 69 case = [1, 0, 1, 0, 0, 0, 1, 0] cost = 19.7293 try = 70 case = [0, 1, 1, 0, 0, 0, 1, 0] cost = 18.733 try = 71 case = [1, 1, 1, 0, 0, 0, 1, 0] cost = 21.626899999999996 try = 72 case = [0, 0, 0, 1, 0, 0, 1, 0] cost = 14.6217 try = 73 case = [1, 0, 0, 1, 0, 0, 1, 0] cost = 20.740199999999998 try = 74 case = [0, 1, 0, 1, 0, 0, 1, 0] cost = 18.459899999999998 try = 75 case = [1, 1, 0, 1, 0, 0, 1, 0] cost = 21.8216 try = 76 case = [0, 0, 1, 1, 0, 0, 1, 0] cost = 18.3757 try = 77 case = [1, 0, 1, 1, 0, 0, 1, 0] cost = 20.838999999999995 try = 78 case = [0, 1, 1, 1, 0, 0, 1, 0] cost = 20.857499999999998 try = 79 case = [1, 1, 1, 1, 0, 0, 1, 0] cost = 20.563999999999997 try = 80 case = [0, 0, 0, 0, 1, 0, 1, 0] cost = 16.073500000000003 try = 81 case = [1, 0, 0, 0, 1, 0, 1, 0] cost = 21.632399999999997 try = 82 case = [0, 1, 0, 0, 1, 0, 1, 0] cost = 18.992299999999997 try = 83 case = [1, 1, 0, 0, 1, 0, 1, 0] cost = 21.794399999999996 try = 84 case = [0, 0, 1, 0, 1, 0, 1, 0] cost = 19.0529 try = 85 case = [1, 0, 1, 0, 1, 0, 1, 0] cost = 20.956599999999998 try = 86 case = [0, 1, 1, 0, 1, 0, 1, 0] cost = 20.615299999999998 try = 87 case = [1, 1, 1, 0, 1, 0, 1, 0] cost = 19.762199999999996 try = 88 case = [0, 0, 0, 1, 1, 0, 1, 0] cost = 20.258399999999995 try = 89 case = [1, 0, 0, 1, 1, 0, 1, 0] cost = 22.629899999999996 try = 90 case = [0, 1, 0, 1, 1, 0, 1, 0] cost = 21.004599999999993 try = 91 case = [1, 1, 0, 1, 1, 0, 1, 0] cost = 20.619299999999996 try = 92 case = [0, 0, 1, 1, 1, 0, 1, 0] cost = 21.688599999999997 try = 93 case = [1, 0, 1, 1, 1, 0, 1, 0] cost = 20.404899999999998 try = 94 case = [0, 1, 1, 1, 1, 0, 1, 0] cost = 21.078399999999995 try = 95 case = [1, 1, 1, 1, 1, 0, 1, 0] cost = 17.0379 try = 96 case = [0, 0, 0, 0, 0, 1, 1, 0] cost = 18.186 try = 97 case = [1, 0, 0, 0, 0, 1, 1, 0] cost = 21.606299999999997 try = 98 case = [0, 1, 0, 0, 0, 1, 1, 0] cost = 20.7442 try = 99 case = [1, 1, 0, 0, 0, 1, 1, 0] cost = 21.4077 try = 100 case = [0, 0, 1, 0, 0, 1, 1, 0] cost = 21.208800000000004 try = 101 case = [1, 0, 1, 0, 0, 1, 1, 0] cost = 20.9739 try = 102 case = [0, 1, 1, 0, 0, 1, 1, 0] cost = 22.410599999999995 try = 103 case = [1, 1, 1, 0, 0, 1, 1, 0] cost = 19.4189 try = 104 case = [0, 0, 0, 1, 0, 1, 1, 0] cost = 20.844899999999996 try = 105 case = [1, 0, 0, 1, 0, 1, 1, 0] cost = 21.077799999999996 try = 106 case = [0, 1, 0, 1, 0, 1, 1, 0] cost = 21.2305 try = 107 case = [1, 1, 0, 1, 0, 1, 1, 0] cost = 18.7066 try = 108 case = [0, 0, 1, 1, 0, 1, 1, 0] cost = 22.3185 try = 109 case = [1, 0, 1, 1, 0, 1, 1, 0] cost = 18.8962 try = 110 case = [0, 1, 1, 1, 0, 1, 1, 0] cost = 21.347700000000003 try = 111 case = [1, 1, 1, 1, 0, 1, 1, 0] cost = 15.168600000000001 try = 112 case = [0, 0, 0, 0, 1, 1, 1, 0] cost = 21.8565 try = 113 case = [1, 0, 0, 0, 1, 1, 1, 0] cost = 21.529799999999998 try = 114 case = [0, 1, 0, 0, 1, 1, 1, 0] cost = 21.322699999999998 try = 115 case = [1, 1, 0, 0, 1, 1, 1, 0] cost = 18.239199999999997 try = 116 case = [0, 0, 1, 0, 1, 1, 1, 0] cost = 22.5555 try = 117 case = [1, 0, 1, 0, 1, 1, 1, 0] cost = 18.5736 try = 118 case = [0, 1, 1, 0, 1, 1, 1, 0] cost = 20.665299999999995 try = 119 case = [1, 1, 1, 0, 1, 1, 1, 0] cost = 13.9266 try = 120 case = [0, 0, 0, 1, 1, 1, 1, 0] cost = 22.853999999999996 try = 121 case = [1, 0, 0, 1, 1, 1, 1, 0] cost = 19.339899999999997 try = 122 case = [0, 1, 0, 1, 1, 1, 1, 0] cost = 20.147599999999997 try = 123 case = [1, 1, 0, 1, 1, 1, 1, 0] cost = 13.8767 try = 124 case = [0, 0, 1, 1, 1, 1, 1, 0] cost = 22.003799999999995 try = 125 case = [1, 0, 1, 1, 1, 1, 1, 0] cost = 14.8345 try = 126 case = [0, 1, 1, 1, 1, 1, 1, 0] cost = 17.941 try = 127 case = [1, 1, 1, 1, 1, 1, 1, 0] cost = 8.0149 try = 128 case = [0, 0, 0, 0, 0, 0, 0, 1] cost = 8.0149 try = 129 case = [1, 0, 0, 0, 0, 0, 0, 1] cost = 17.940999999999995 try = 130 case = [0, 1, 0, 0, 0, 0, 0, 1] cost = 14.8345 try = 131 case = [1, 1, 0, 0, 0, 0, 0, 1] cost = 22.0038 try = 132 case = [0, 0, 1, 0, 0, 0, 0, 1] cost = 13.876700000000001 try = 133 case = [1, 0, 1, 0, 0, 0, 0, 1] cost = 20.147599999999997 try = 134 case = [0, 1, 1, 0, 0, 0, 0, 1] cost = 19.3399 try = 135 case = [1, 1, 1, 0, 0, 0, 0, 1] cost = 22.854 try = 136 case = [0, 0, 0, 1, 0, 0, 0, 1] cost = 13.9266 try = 137 case = [1, 0, 0, 1, 0, 0, 0, 1] cost = 20.665299999999995 try = 138 case = [0, 1, 0, 1, 0, 0, 0, 1] cost = 18.5736 try = 139 case = [1, 1, 0, 1, 0, 0, 0, 1] cost = 22.555499999999995 try = 140 case = [0, 0, 1, 1, 0, 0, 0, 1] cost = 18.239199999999997 try = 141 case = [1, 0, 1, 1, 0, 0, 0, 1] cost = 21.322699999999998 try = 142 case = [0, 1, 1, 1, 0, 0, 0, 1] cost = 21.5298 try = 143 case = [1, 1, 1, 1, 0, 0, 0, 1] cost = 21.856499999999997 try = 144 case = [0, 0, 0, 0, 1, 0, 0, 1] cost = 15.168600000000001 try = 145 case = [1, 0, 0, 0, 1, 0, 0, 1] cost = 21.347699999999996 try = 146 case = [0, 1, 0, 0, 1, 0, 0, 1] cost = 18.8962 try = 147 case = [1, 1, 0, 0, 1, 0, 0, 1] cost = 22.318499999999993 try = 148 case = [0, 0, 1, 0, 1, 0, 0, 1] cost = 18.7066 try = 149 case = [1, 0, 1, 0, 1, 0, 0, 1] cost = 21.2305 try = 150 case = [0, 1, 1, 0, 1, 0, 0, 1] cost = 21.077800000000003 try = 151 case = [1, 1, 1, 0, 1, 0, 0, 1] cost = 20.844899999999996 try = 152 case = [0, 0, 0, 1, 1, 0, 0, 1] cost = 19.418899999999994 try = 153 case = [1, 0, 0, 1, 1, 0, 0, 1] cost = 22.410599999999995 try = 154 case = [0, 1, 0, 1, 1, 0, 0, 1] cost = 20.9739 try = 155 case = [1, 1, 0, 1, 1, 0, 0, 1] cost = 21.208799999999997 try = 156 case = [0, 0, 1, 1, 1, 0, 0, 1] cost = 21.4077 try = 157 case = [1, 0, 1, 1, 1, 0, 0, 1] cost = 20.7442 try = 158 case = [0, 1, 1, 1, 1, 0, 0, 1] cost = 21.606299999999997 try = 159 case = [1, 1, 1, 1, 1, 0, 0, 1] cost = 18.186 try = 160 case = [0, 0, 0, 0, 0, 1, 0, 1] cost = 17.0379 try = 161 case = [1, 0, 0, 0, 0, 1, 0, 1] cost = 21.0784 try = 162 case = [0, 1, 0, 0, 0, 1, 0, 1] cost = 20.4049 try = 163 case = [1, 1, 0, 0, 0, 1, 0, 1] cost = 21.688599999999997 try = 164 case = [0, 0, 1, 0, 0, 1, 0, 1] cost = 20.6193 try = 165 case = [1, 0, 1, 0, 0, 1, 0, 1] cost = 21.004599999999996 try = 166 case = [0, 1, 1, 0, 0, 1, 0, 1] cost = 22.6299 try = 167 case = [1, 1, 1, 0, 0, 1, 0, 1] cost = 20.258399999999998 try = 168 case = [0, 0, 0, 1, 0, 1, 0, 1] cost = 19.762199999999996 try = 169 case = [1, 0, 0, 1, 0, 1, 0, 1] cost = 20.615299999999994 try = 170 case = [0, 1, 0, 1, 0, 1, 0, 1] cost = 20.956599999999998 try = 171 case = [1, 1, 0, 1, 0, 1, 0, 1] cost = 19.052899999999998 try = 172 case = [0, 0, 1, 1, 0, 1, 0, 1] cost = 21.7944 try = 173 case = [1, 0, 1, 1, 0, 1, 0, 1] cost = 18.992299999999997 try = 174 case = [0, 1, 1, 1, 0, 1, 0, 1] cost = 21.632399999999997 try = 175 case = [1, 1, 1, 1, 0, 1, 0, 1] cost = 16.073500000000003 try = 176 case = [0, 0, 0, 0, 1, 1, 0, 1] cost = 20.564 try = 177 case = [1, 0, 0, 0, 1, 1, 0, 1] cost = 20.857499999999995 try = 178 case = [0, 1, 0, 0, 1, 1, 0, 1] cost = 20.839 try = 179 case = [1, 1, 0, 0, 1, 1, 0, 1] cost = 18.3757 try = 180 case = [0, 0, 1, 0, 1, 1, 0, 1] cost = 21.8216 try = 181 case = [1, 0, 1, 0, 1, 1, 0, 1] cost = 18.4599 try = 182 case = [0, 1, 1, 0, 1, 1, 0, 1] cost = 20.7402 try = 183 case = [1, 1, 1, 0, 1, 1, 0, 1] cost = 14.621700000000002 try = 184 case = [0, 0, 0, 1, 1, 1, 0, 1] cost = 21.626899999999996 try = 185 case = [1, 0, 0, 1, 1, 1, 0, 1] cost = 18.732999999999993 try = 186 case = [0, 1, 0, 1, 1, 1, 0, 1] cost = 19.7293 try = 187 case = [1, 1, 0, 1, 1, 1, 0, 1] cost = 14.0786 try = 188 case = [0, 0, 1, 1, 1, 1, 0, 1] cost = 21.335299999999993 try = 189 case = [1, 0, 1, 1, 1, 1, 0, 1] cost = 14.786200000000001 try = 190 case = [0, 1, 1, 1, 1, 1, 0, 1] cost = 18.0813 try = 191 case = [1, 1, 1, 1, 1, 1, 0, 1] cost = 8.775400000000001 try = 192 case = [0, 0, 0, 0, 0, 0, 1, 1] cost = 15.6949 try = 193 case = [1, 0, 0, 0, 0, 0, 1, 1] cost = 20.537599999999998 try = 194 case = [0, 1, 0, 0, 0, 0, 1, 1] cost = 19.390500000000003 try = 195 case = [1, 1, 0, 0, 0, 0, 1, 1] cost = 21.476399999999998 try = 196 case = [0, 0, 1, 0, 0, 0, 1, 1] cost = 19.701900000000002 try = 197 case = [1, 0, 1, 0, 0, 0, 1, 1] cost = 20.889399999999995 try = 198 case = [0, 1, 1, 0, 0, 0, 1, 1] cost = 22.0411 try = 199 case = [1, 1, 1, 0, 0, 0, 1, 1] cost = 20.471799999999995 try = 200 case = [0, 0, 0, 1, 0, 0, 1, 1] cost = 19.1246 try = 201 case = [1, 0, 0, 1, 0, 0, 1, 1] cost = 20.779899999999998 try = 202 case = [0, 1, 0, 1, 0, 0, 1, 1] cost = 20.647600000000004 try = 203 case = [1, 1, 0, 1, 0, 0, 1, 1] cost = 19.546100000000003 try = 204 case = [0, 0, 1, 1, 0, 0, 1, 1] cost = 21.5824 try = 205 case = [1, 0, 1, 1, 0, 0, 1, 1] cost = 19.582499999999996 try = 206 case = [0, 1, 1, 1, 0, 0, 1, 1] cost = 21.749000000000002 try = 207 case = [1, 1, 1, 1, 0, 0, 1, 1] cost = 16.9923 try = 208 case = [0, 0, 0, 0, 1, 0, 1, 1] cost = 20.7038 try = 209 case = [1, 0, 0, 0, 1, 0, 1, 1] cost = 21.799499999999995 try = 210 case = [0, 1, 0, 0, 1, 0, 1, 1] cost = 21.3074 try = 211 case = [1, 1, 0, 0, 1, 0, 1, 1] cost = 19.646300000000004 try = 212 case = [0, 0, 1, 0, 1, 0, 1, 1] cost = 22.387 try = 213 case = [1, 0, 1, 0, 1, 0, 1, 1] cost = 19.8275 try = 214 case = [0, 1, 1, 0, 1, 0, 1, 1] cost = 21.6342 try = 215 case = [1, 1, 1, 0, 1, 0, 1, 1] cost = 16.3179 try = 216 case = [0, 0, 0, 1, 1, 0, 1, 1] cost = 22.472099999999998 try = 217 case = [1, 0, 0, 1, 1, 0, 1, 1] cost = 20.380399999999995 try = 218 case = [0, 1, 0, 1, 1, 0, 1, 1] cost = 20.903099999999995 try = 219 case = [1, 1, 0, 1, 1, 0, 1, 1] cost = 16.0546 try = 220 case = [0, 0, 1, 1, 1, 0, 1, 1] cost = 22.606099999999998 try = 221 case = [1, 0, 1, 1, 1, 0, 1, 1] cost = 16.8592 try = 222 case = [0, 1, 1, 1, 1, 0, 1, 1] cost = 19.6807 try = 223 case = [1, 1, 1, 1, 1, 0, 1, 1] cost = 11.177000000000001 try = 224 case = [0, 0, 0, 0, 0, 1, 1, 1] cost = 22.951500000000003 try = 225 case = [1, 0, 0, 0, 0, 1, 1, 1] cost = 21.9086 try = 226 case = [0, 1, 0, 0, 0, 1, 1, 1] cost = 23.1945 try = 227 case = [1, 1, 0, 0, 0, 1, 1, 1] cost = 19.3948 try = 228 case = [0, 0, 1, 0, 0, 1, 1, 1] cost = 24.6781 try = 229 case = [1, 0, 1, 0, 0, 1, 1, 1] cost = 19.98 try = 230 case = [0, 1, 1, 0, 0, 1, 1, 1] cost = 23.564700000000006 try = 231 case = [1, 1, 1, 0, 0, 1, 1, 1] cost = 16.1098 try = 232 case = [0, 0, 0, 1, 0, 1, 1, 1] cost = 23.193799999999996 try = 233 case = [1, 0, 0, 1, 0, 1, 1, 1] cost = 18.963499999999996 try = 234 case = [0, 1, 0, 1, 0, 1, 1, 1] cost = 21.2642 try = 235 case = [1, 1, 0, 1, 0, 1, 1, 1] cost = 14.2771 try = 236 case = [0, 0, 1, 1, 0, 1, 1, 1] cost = 23.3712 try = 237 case = [1, 0, 1, 1, 0, 1, 1, 1] cost = 15.485700000000001 try = 238 case = [0, 1, 1, 1, 0, 1, 1, 1] cost = 20.085200000000004 try = 239 case = [1, 1, 1, 1, 0, 1, 1, 1] cost = 9.442900000000002 try = 240 case = [0, 0, 0, 0, 1, 1, 1, 1] cost = 24.3328 try = 241 case = [1, 0, 0, 0, 1, 1, 1, 1] cost = 19.5429 try = 242 case = [0, 1, 0, 0, 1, 1, 1, 1] cost = 21.483800000000002 try = 243 case = [1, 1, 0, 0, 1, 1, 1, 1] cost = 13.9371 try = 244 case = [0, 0, 1, 0, 1, 1, 1, 1] cost = 23.7356 try = 245 case = [1, 0, 1, 0, 1, 1, 1, 1] cost = 15.2905 try = 246 case = [0, 1, 1, 0, 1, 1, 1, 1] cost = 19.530200000000004 try = 247 case = [1, 1, 1, 0, 1, 1, 1, 1] cost = 8.328299999999999 try = 248 case = [0, 0, 0, 1, 1, 1, 1, 1] cost = 22.9137 try = 249 case = [1, 0, 0, 1, 1, 1, 1, 1] cost = 14.9364 try = 250 case = [0, 1, 0, 1, 1, 1, 1, 1] cost = 17.8921 try = 251 case = [1, 1, 0, 1, 1, 1, 1, 1] cost = 7.158 try = 252 case = [0, 0, 1, 1, 1, 1, 1, 1] cost = 20.7673 try = 253 case = [1, 0, 1, 1, 1, 1, 1, 1] cost = 9.1348 try = 254 case = [0, 1, 1, 1, 1, 1, 1, 1] cost = 14.3893 try = 255 case = [1, 1, 1, 1, 1, 1, 1, 1] cost = 0.0 try = 256 Best case(solution) = [1, 1, 0, 1, 1, 0, 0, 0] cost = 24.678100000000004
draw_graph(G, colors_brute, pos)
from scipy.optimize import minimize
from tqdm import tqdm
expectation = get_expectation(G)
p = 64
res = []
for i in tqdm(range(1, p+1)):
res.append(minimize(expectation, np.ones(i*2)*np.pi/2,
method='COBYLA', options={'maxiter': 2500}))
100%|██████████| 64/64 [34:34<00:00, 32.41s/it]
approx = []
for i in range(p):
approx.append(-res[i].fun/best_cost_brute)
x = np.arange(1, p+1, 1)
plt.plot(x, approx, marker='+', c='k', linestyle='--')
plt.ylim((0.5, 1))
plt.xlim(0, p+1)
plt.xlabel('p')
plt.ylabel('Approximation')
plt.grid(True)
plt.show()
best_p = np.argmax(approx)
print("The best p(iterationo number) is", best_p)
The best p(iterationo number) is 26
res[best_p].x
array([1.59207929, 1.60797685, 1.60076399, 1.55435182, 1.5975448 ,
1.50995373, 1.55019809, 1.54504547, 1.54535408, 1.56693889,
1.55799298, 1.55950196, 2.57783127, 1.56654407, 2.58106201,
1.56654646, 1.56652705, 1.65367639, 1.4913896 , 1.80166326,
1.73297531, 1.56926412, 1.56568881, 1.60552461, 1.45015229,
1.59673176, 1.56932081, 1.55905032, 1.57378558, 1.55684224,
1.6469045 , 1.48523703, 1.56328948, 1.57129656, 1.59436766,
1.53907054, 1.65164999, 1.50065551, 1.6881493 , 1.44951417,
1.62342295, 1.56851081, 1.54562928, 1.56407031, 1.54177909,
1.60015493, 1.61734721, 1.58639236, 1.56611347, 1.57760546,
1.58537892, 1.5417166 , 1.5670209 , 1.56859147])
from qiskit.visualization import plot_histogram
backend = Aer.get_backend('aer_simulator')
backend.shots = 512
qc_res = create_qaoa_circ(G, res[best_p].x)
counts = backend.run(qc_res, seed_simulator=10).result().get_counts()
plot_histogram(counts, figsize=(32, 5), title='512 Shots')
# # Plot the Quantum Circuit of QAOA
# qc_res.draw(output='mpl', plot_barriers=True).savefig('QAOA_circuit1.png', dpi=480)
# # qc_res.draw(output='mpl', plot_barriers=True)
# qc_res.draw(output='mpl', plot_barriers=True, fold=-1).savefig('QAOA_circuit(full).png', dpi=720)
# qc_res.draw(output='mpl', plot_barriers=True, fold=-1)
xbest_qaoa = list(map(int, list(max(counts, key=counts.get))))
colors_qaoa = ['g' if xbest_qaoa[i] == 0 else 'c' for i in range(n)]
print('xbest_brute :', xbest_brute)
print('QAOA :', xbest_qaoa)
xbest_brute : [1, 1, 0, 1, 1, 0, 0, 0] QAOA : [0, 0, 1, 1, 1, 0, 1, 1]
draw_graph(G, colors_brute, pos)
draw_graph(G, colors_qaoa, pos)
print('Best solution - Brute Force : ' +
str(xbest_brute) + ', cost = ' + str(best_cost_brute))
print('Best solution - QAOA : ' + str(xbest_qaoa) +
', cost = ' + str(-res[best_p].fun))
Best solution - Brute Force : [1, 1, 0, 1, 1, 0, 0, 0], cost = 24.678100000000004 Best solution - QAOA : [0, 0, 1, 1, 1, 0, 1, 1], cost = 20.9053478515625
# Visualising the clusters
x = data_iris_qaoa
y = np.array(xbest_brute)
plt.figure(figsize=(12, 8))
plt.scatter(x[y == 0, 0], x[y == 0, 1], s=100, c='purple', label='Cluster A')
plt.scatter(x[y == 1, 0], x[y == 1, 1], s=100, c='orange', label='Cluster B')
plt.title('Clustering using Brute-Force')
plt.xlabel('sepal length (cm)')
plt.ylabel('sepal width (cm)')
plt.legend(title="Species", loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()
# Plotting the centroids of the clusters
# plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=100, c='red', label='Centroids')
# plt.legend(title="Species", loc='center left', bbox_to_anchor=(1, 0.5))
# Visualising the clusters
x = data_iris_qaoa
y = np.array(xbest_qaoa)
plt.figure(figsize=(12, 8))
plt.scatter(x[y == 0, 0], x[y == 0, 1], s=100, c='purple', label='Cluster A')
plt.scatter(x[y == 1, 0], x[y == 1, 1], s=100, c='orange', label='Cluster B')
plt.title('Clustering using QAOA')
plt.xlabel('sepal length (cm)')
plt.ylabel('sepal width (cm)')
plt.legend(title="Species", loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()
# Plotting the centroids of the clusters
# plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=100, c='red', label='Centroids')
# plt.legend(title="Species", loc='center left', bbox_to_anchor=(1, 0.5))
# Visualising the clusters
x = data_iris_qaoa
y = np.array(data_iris_qaoa_label)
plt.figure(figsize=(12, 8))
# plt.scatter(x[y == 0, 0], x[y == 0, 1], s=100, c='purple', label='Cluster A(setosa)')
plt.scatter(x[y == 1, 0], x[y == 1, 1], s=100, c='purple', label='Cluster A(versicolor)')
plt.scatter(x[y == 2, 0], x[y == 2, 1], s=100, c='orange', label='Cluster B(virginica)')
plt.title('Clustering with True labels')
plt.xlabel('sepal length (cm)')
plt.ylabel('sepal width (cm)')
plt.legend(title="Species", loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()
# Plotting the centroids of the clusters
# plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=100, c='red', label='Centroids')
# plt.legend(title="Species", loc='center left', bbox_to_anchor=(1, 0.5))
import os
os.environ["OMP_NUM_THREADS"] = '1'
# Finding the optimum number of clusters for k-means classification
from sklearn.cluster import KMeans
x = data_iris_sample.iloc[:, 0:4]
x
| sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | |
|---|---|---|---|---|
| 64 | 5.6 | 2.9 | 3.6 | 1.3 |
| 65 | 6.7 | 3.1 | 4.4 | 1.4 |
| 77 | 6.7 | 3.0 | 5.0 | 1.7 |
| 83 | 6.0 | 2.7 | 5.1 | 1.6 |
| 114 | 5.8 | 2.8 | 5.1 | 2.4 |
| 125 | 7.2 | 3.2 | 6.0 | 1.8 |
| 140 | 6.7 | 3.1 | 5.6 | 2.4 |
| 141 | 6.9 | 3.1 | 5.1 | 2.3 |
# Finding the optimum number of clusters for k-means classification
wcss = []
max_num_cluster = len(data_iris_sample)
for i in range(1, max_num_cluster):
kmeans = KMeans(n_clusters=i, init='k-means++',
max_iter=300, n_init=10, random_state=0)
kmeans.fit(x)
wcss.append(kmeans.inertia_)
plt.plot(range(1, max_num_cluster), wcss)
plt.xticks(np.arange(1, max_num_cluster, step=1)) # Set label locations.
plt.title('The elbow method')
plt.xlabel('Number of clusters')
plt.ylabel('Within Cluster Sum of Squares') # within cluster sum of squares
plt.show()
C:\Users\user1\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:1036: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1. warnings.warn(
num_cluster = 2
kmeans = KMeans(n_clusters=num_cluster, init='k-means++',
max_iter=300, n_init=10, random_state=0)
y_kmeans = kmeans.fit_predict(x)
x.iloc[0:10, :]
y_kmeans
array([0, 1, 1, 1, 1, 1, 1, 1])
y_kmeans
array([0, 1, 1, 1, 1, 1, 1, 1])
# Visualising the clusters
plt.figure(figsize=(12, 8))
plt.scatter(x.iloc[y_kmeans == 0, 0], x.iloc[y_kmeans ==
0, 1], s=100, c='purple', label='Cluster A')
plt.scatter(x.iloc[y_kmeans == 1, 0], x.iloc[y_kmeans ==
1, 1], s=100, c='orange', label='Cluster B')
# plt.scatter(x.iloc[y_kmeans == 2, 0], x.iloc[y_kmeans == 2, 1], s=100, c='green', label='Cluster C')
plt.title('Clustering with K-Means')
plt.xlabel('sepal length (cm)')
plt.ylabel('sepal width (cm)')
plt.legend(title="Species", loc='center left', bbox_to_anchor=(1, 0.5))
# Plotting the centroids of the clusters
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[
:, 1], s=100, marker="v", c='red', label='Centroids')
plt.legend(title="Species", loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
np.random.seed(100)
num_data = 50
x11 = np.linspace(0.3, 0.7, 20)
x12 = np.linspace(1.3, 1.8, 15)
x13 = np.linspace(2.4, 3, 15)
x1 = np.concatenate((x11, x12, x13), axis=None)
error = np.random.normal(1, 0.5, num_data)
x2 = 1.5*x1+2+error
fig = plt.figure(figsize=(7, 7))
fig.set_facecolor('white')
plt.scatter(x1, x2, color='k')
plt.xlabel('x1')
plt.ylabel('x2')
plt.show()
X = np.stack([x1, x2], axis=1)
init = np.array([[2., 4.], [1., 5.], [2.5, 6.]])
kmeans = KMeans(n_clusters=3, init=init)
kmeans.fit(X)
labels = kmeans.labels_
def get_silhouette_results(X, labels):
def get_sum_distance(target_x, target_cluster):
res = np.sum([np.linalg.norm(target_x-x) for x in target_cluster])
return res
'''
각 데이터 포인트를 돌면서 a(i), b(i)를 계산
그리고 s(i)를 계산한다.
마지막으로 Silhouette(실루엣) Coefficient를 계산한다.
'''
uniq_labels = np.unique(labels)
silhouette_val_list = []
for i in range(len(labels)):
target_data = X[i]
# calculate a(i)
target_label = labels[i]
target_cluster_data_idx = np.where(labels == target_label)[0]
if len(target_cluster_data_idx) == 1:
silhouette_val_list.append(0)
continue
else:
target_cluster_data = X[target_cluster_data_idx]
temp1 = get_sum_distance(target_data, target_cluster_data)
a_i = temp1/(target_cluster_data.shape[0]-1)
# calculate b(i)
b_i_list = []
label_list = uniq_labels[np.unique(labels) != target_label]
for ll in label_list:
other_cluster_data_idx = np.where(labels == ll)[0]
other_cluster_data = X[other_cluster_data_idx]
temp2 = get_sum_distance(target_data, other_cluster_data)
temp_b_i = temp2/other_cluster_data.shape[0]
b_i_list.append(temp_b_i)
b_i = min(b_i_list)
s_i = (b_i-a_i)/max(a_i, b_i)
silhouette_val_list.append(s_i)
silhouette_coef_list = []
for ul in uniq_labels:
temp3 = np.mean(
[s for s, l in zip(silhouette_val_list, labels) if l == ul])
silhouette_coef_list.append(temp3)
silhouette_coef = max(silhouette_coef_list)
return (silhouette_coef, np.array(silhouette_val_list))
silhouette_coef, silhouette_val_list = get_silhouette_results(X, labels)
print(silhouette_coef)
0.7434423527756951
import seaborn as sns
# 각 클러스터별로 Silhouette(실루엣) 값을 정렬한다.
uniq_labels = np.unique(labels)
sorted_cluster_svl = []
rearr_labels = []
for ul in uniq_labels:
labels_idx = np.where(labels == ul)[0]
target_svl = silhouette_val_list[labels_idx]
sorted_cluster_svl += sorted(target_svl)
rearr_labels += [ul]*len(target_svl)
colors = sns.color_palette('hls', len(uniq_labels))
color_labels = [colors[i] for i in rearr_labels]
fig = plt.figure(figsize=(6, 10))
fig.set_facecolor('white')
plt.barh(range(len(sorted_cluster_svl)),
sorted_cluster_svl, color=color_labels)
plt.ylabel('Data Index')
plt.xlabel('Silhouette Value')
plt.axvline(x=np.mean(sorted_cluster_svl),
color='black', label='avg. Silhouette score')
plt.text(np.mean(sorted_cluster_svl)+0.02, -1,
'Avg. \nSilhouette score='+str(round(np.mean(sorted_cluster_svl), 4)))
plt.show()
X = data_iris_qaoa
labels = np.array(xbest_brute)
silhouette_coef, silhouette_val_list = get_silhouette_results(X, labels)
print(silhouette_coef)
0.41383262040548235
import seaborn as sns
# 각 클러스터별로 Silhouette(실루엣) 값을 정렬한다.
uniq_labels = np.unique(labels)
sorted_cluster_svl = []
rearr_labels = []
for ul in uniq_labels:
labels_idx = np.where(labels == ul)[0]
target_svl = silhouette_val_list[labels_idx]
sorted_cluster_svl += sorted(target_svl)
rearr_labels += [ul]*len(target_svl)
colors = sns.color_palette('hls', len(uniq_labels))
color_labels = [colors[i] for i in rearr_labels]
fig = plt.figure(figsize=(6, 10))
fig.set_facecolor('white')
plt.barh(range(len(sorted_cluster_svl)),
sorted_cluster_svl, color=color_labels)
plt.title('Silhoutte Score of Brute Force')
plt.ylabel('Data Index')
plt.xlabel('Silhouette Value')
plt.axvline(x=np.mean(sorted_cluster_svl),
color='black', label='avg. Silhouette score')
plt.text(np.mean(sorted_cluster_svl)+0.02, -1,
'Avg. \nSilhouette score='+str(round(np.mean(sorted_cluster_svl), 4)))
plt.show()
X = data_iris_qaoa
labels = np.array(xbest_qaoa)
silhouette_coef, silhouette_val_list = get_silhouette_results(X, labels)
print(silhouette_coef)
0.359418702219478
import seaborn as sns
# 각 클러스터별로 Silhouette(실루엣) 값을 정렬한다.
uniq_labels = np.unique(labels)
sorted_cluster_svl = []
rearr_labels = []
for ul in uniq_labels:
labels_idx = np.where(labels == ul)[0]
target_svl = silhouette_val_list[labels_idx]
sorted_cluster_svl += sorted(target_svl)
rearr_labels += [ul]*len(target_svl)
colors = sns.color_palette('hls', len(uniq_labels))
color_labels = [colors[i] for i in rearr_labels]
fig = plt.figure(figsize=(6, 10))
fig.set_facecolor('white')
plt.barh(range(len(sorted_cluster_svl)),
sorted_cluster_svl, color=color_labels)
plt.title('Silhoutte Score of QAOA')
plt.ylabel('Data Index')
plt.xlabel('Silhouette Value')
plt.axvline(x=np.mean(sorted_cluster_svl),
color='black', label='avg. Silhouette score')
plt.text(np.mean(sorted_cluster_svl)+0.02, -1,
'Avg. \nSilhouette score='+str(round(np.mean(sorted_cluster_svl), 4)))
plt.show()
X = data_iris_qaoa
labels = y_kmeans
silhouette_coef, silhouette_val_list = get_silhouette_results(X, labels)
print(silhouette_coef)
0.41305326815119164
type(xbest_brute)
list
import seaborn as sns
# 각 클러스터별로 Silhouette(실루엣) 값을 정렬한다.
uniq_labels = np.unique(labels)
sorted_cluster_svl = []
rearr_labels = []
for ul in uniq_labels:
labels_idx = np.where(labels == ul)[0]
target_svl = silhouette_val_list[labels_idx]
sorted_cluster_svl += sorted(target_svl)
rearr_labels += [ul]*len(target_svl)
colors = sns.color_palette('hls', len(uniq_labels))
color_labels = [colors[i] for i in rearr_labels]
fig = plt.figure(figsize=(6, 10))
fig.set_facecolor('white')
plt.barh(range(len(sorted_cluster_svl)),
sorted_cluster_svl, color=color_labels)
plt.title('Silhoutte Score of K-Means')
plt.ylabel('Data Index')
plt.xlabel('Silhouette Value')
plt.axvline(x=np.mean(sorted_cluster_svl),
color='black', label='avg. Silhouette score')
plt.text(np.mean(sorted_cluster_svl)+0.02, -1,
'Avg. \nSilhouette score='+str(round(np.mean(sorted_cluster_svl), 4)))
plt.show()
data_iris_qaoa_label-1
array([0, 0, 0, 0, 1, 1, 1, 1])
data_iris_qaoa_label
# data_iris_qaoa_label2 = np.where(data_iris_qaoa_label > 2, 1, data_iris_qaoa_label)
data_iris_qaoa_label2 = data_iris_qaoa_label-1
data_iris_qaoa_label2
array([0, 0, 0, 0, 1, 1, 1, 1])
X = data_iris_qaoa
labels = data_iris_qaoa_label2
silhouette_coef, silhouette_val_list = get_silhouette_results(X, labels)
print(silhouette_coef)
0.27278851009414645
import seaborn as sns
# 각 클러스터별로 Silhouette(실루엣) 값을 정렬한다.
uniq_labels = np.unique(labels)
sorted_cluster_svl = []
rearr_labels = []
for ul in uniq_labels:
labels_idx = np.where(labels == ul)[0]
target_svl = silhouette_val_list[labels_idx]
sorted_cluster_svl += sorted(target_svl)
rearr_labels += [ul]*len(target_svl)
colors = sns.color_palette('hls', len(uniq_labels))
color_labels = [colors[i] for i in rearr_labels]
fig = plt.figure(figsize=(6, 10))
fig.set_facecolor('white')
plt.barh(range(len(sorted_cluster_svl)),
sorted_cluster_svl, color=color_labels)
plt.title('Silhoutte Score of True Label')
plt.ylabel('Data Index')
plt.xlabel('Silhouette Value')
plt.axvline(x=np.mean(sorted_cluster_svl),
color='black', label='avg. Silhouette score')
plt.text(np.mean(sorted_cluster_svl)+0.02, -1,
'Avg. \nSilhouette score='+str(round(np.mean(sorted_cluster_svl), 4)))
plt.show()
import numpy as np
import random
import matplotlib.pyplot as plt
from itertools import combinations
import numpy as np
import random
import matplotlib.pyplot as plt
from itertools import combinations
np.random.seed(100)
num_data = 20
x1 = np.linspace(0.3, 0.7, num_data)
error = np.random.normal(1, 0.5, num_data)
x2 = 1.5*x1+2+error
X = np.stack([x1, x2], axis=1)
X
array([[0.3 , 2.57511726],
[0.32105263, 3.65291915],
[0.34210526, 4.0896758 ],
[0.36315789, 3.41851882],
[0.38421053, 4.06697618],
[0.40526316, 3.86500416],
[0.42631579, 3.75006352],
[0.44736842, 3.13603097],
[0.46842105, 3.60788366],
[0.48947368, 3.86171125],
[0.51052632, 3.53677598],
[0.53157895, 4.01495017],
[0.55263158, 3.53714984],
[0.57368421, 4.26894985],
[0.59473684, 4.22846567],
[0.61578947, 3.87147864],
[0.63684211, 3.68962297],
[0.65789474, 4.50170845],
[0.67894737, 3.79935324],
[0.7 , 3.49084088]])
fig = plt.figure(figsize=(7, 7))
fig.set_facecolor('white')
plt.scatter(x1, x2, color='k')
plt.xlabel('x1')
plt.ylabel('x2')
plt.show()
def complete_diameter_distance(X):
res = []
for i, j in combinations(range(X.shape[0]), 2):
a_i = X[i, :]
a_j = X[j, :]
res.append(np.linalg.norm(a_i-a_j))
return np.max(res)
complete_diameter_distance(X)
1.9595515390777758
def average_diameter_distance(X):
res = []
for i, j in combinations(range(X.shape[0]), 2):
a_i = X[i, :]
a_j = X[j, :]
res.append(np.linalg.norm(a_i-a_j))
return np.mean(res)
average_diameter_distance(X)
0.5159584769337329
def centroid_diameter_distance(X):
center = np.mean(X, axis=0)
res = 2*np.mean([np.linalg.norm(x-center) for x in X])
return res
centroid_diameter_distance(X)
0.6874635793987067
np.random.seed(100)
x11 = np.linspace(0.3, 0.7, 20)
label1 = [0]*len(x11)
x12 = np.linspace(1.3, 1.8, 15)
label2 = [1]*len(x12)
error = np.random.normal(1, 0.5, 35)
x1 = np.concatenate((x11, x12), axis=None)
x2 = 1.5*x1+2+error
labels = label1+label2
X = np.stack((x1, x2), axis=1)
labels = np.array(labels)
X1 = X[np.where(labels == 0)[0], :]
X2 = X[np.where(labels == 1)[0], :]
fig = plt.figure(figsize=(7, 7))
fig.set_facecolor('white')
for i, x in enumerate(X):
if labels[i] == 0:
plt.scatter(x[0], x[1], color='blue')
else:
plt.scatter(x[0], x[1], color='red')
plt.xlabel('x1')
plt.ylabel('x2')
plt.show()
def single_linkage_distance(X1, X2):
res = []
for x1 in X1:
for x2 in X2:
res.append(np.linalg.norm(x1-x2))
return np.min(res)
single_linkage_distance(X1, X2)
0.7724228550378145
def complete_linkage_distance(X1, X2):
res = []
for x1 in X1:
for x2 in X2:
res.append(np.linalg.norm(x1-x2))
return np.max(res)
complete_linkage_distance(X1, X2)
3.807983171195838
def average_linkage_distance(X1, X2):
res = []
for x1 in X1:
for x2 in X2:
res.append(np.linalg.norm(x1-x2))
return np.mean(res)
average_linkage_distance(X1, X2)
2.0502961616379003
def centroid_linkage_distance(X1, X2):
center1 = np.mean(X1, axis=0)
center2 = np.mean(X2, axis=0)
return np.linalg.norm(center1-center2)
centroid_linkage_distance(X1, X2)
2.023846293346597
def average_of_centroids_linkage_distance(X1, X2):
center1 = np.mean(X1, axis=0)
center2 = np.mean(X2, axis=0)
res = []
for x1 in X1:
res.append(np.linalg.norm(x1-center2))
for x2 in X2:
res.append(np.linalg.norm(x2-center1))
return np.mean(res)
average_of_centroids_linkage_distance(X1, X2)
2.035733790732974
np.random.seed(100)
num_data = 50
x11 = np.linspace(0.3, 0.7, 20)
label1 = [0]*len(x11)
x12 = np.linspace(1.3, 1.8, 15)
label2 = [1]*len(x12)
x13 = np.linspace(2.4, 3, 15)
label3 = [2]*len(x13)
x1 = np.concatenate((x11, x12, x13), axis=None)
error = np.random.normal(1, 0.5, num_data)
x2 = 1.5*x1+2+error
X = np.stack((x1, x2), axis=1)
labels = np.array(label1+label2+label3)
fig = plt.figure(figsize=(7, 7))
fig.set_facecolor('white')
for i, x in enumerate(X):
if labels[i] == 0:
plt.scatter(x[0], x[1], color='blue')
elif labels[i] == 1:
plt.scatter(x[0], x[1], color='red')
else:
plt.scatter(x[0], x[1], color='green')
plt.xlabel('x1')
plt.ylabel('x2')
plt.show()
def Dunn_index(X, labels, intra_cluster_distance_type, inter_cluster_distance_type):
intra_cdt_dict = {
'cmpl_dd': complete_diameter_distance,
'avdd': average_diameter_distance,
'cent_dd': centroid_diameter_distance
}
inter_cdt_dict = {
'sld': single_linkage_distance,
'cmpl_ld': complete_linkage_distance,
'avld': average_linkage_distance,
'cent_ld': centroid_linkage_distance,
'av_cent_ld': average_of_centroids_linkage_distance
}
# intra cluster distance
intra_cluster_distance = intra_cdt_dict[intra_cluster_distance_type]
# inter cluster distance
inter_cluster_distance = inter_cdt_dict[inter_cluster_distance_type]
# get minimum value of inter_cluster_distance
res1 = []
for i, j in combinations(np.unique(labels), 2):
X1 = X[np.where(labels == i)[0], :]
X2 = X[np.where(labels == j)[0], :]
res1.append(inter_cluster_distance(X1, X2))
min_inter_cd = np.min(res1)
# get maximum value of intra_cluser_distance
res2 = []
for label in np.unique(labels):
X_target = X[np.where(labels == label)[0], :]
if X_target.shape[0] >= 2:
res2.append(intra_cluster_distance(X_target))
else:
res2.append(0)
max_intra_cd = np.max(res2)
Dunn_idx = min_inter_cd/max_intra_cd
return Dunn_idx
intra_cluster_distance_type = ['cmpl_dd', 'avdd', 'cent_dd']
inter_cluster_distance_type = [
'sld', 'cmpl_ld', 'avld', 'cent_ld', 'av_cent_ld']
for i in range(len(intra_cluster_distance_type)):
for j in range(len(inter_cluster_distance_type)):
print("Dunn Index:", "Intra Cluster Dist.:", '%-10s' % intra_cluster_distance_type[i],
"Inter Cluster Dist.:", '%-12s' % inter_cluster_distance_type[j],
"Dunn Index Valule:", Dunn_index(X, labels, intra_cluster_distance_type[i], inter_cluster_distance_type[j]))
Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: sld Dunn Index Valule: 0.2780279425885912 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 1.5816104606529293 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: avld Dunn Index Valule: 0.7627361333011984 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: cent_ld Dunn Index Valule: 0.7374610426286897 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 0.7486880384584048 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: sld Dunn Index Valule: 0.8233205335652861 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 4.683602504961463 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: avld Dunn Index Valule: 2.2586806002025015 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: cent_ld Dunn Index Valule: 2.1838338026300956 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 2.2170801594919096 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: sld Dunn Index Valule: 0.6140262424157213 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 3.492995412900513 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: avld Dunn Index Valule: 1.6845069510824404 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: cent_ld Dunn Index Valule: 1.6286867741323774 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 1.6534816562537682
import random
import pandas as pd
# 빈 DataFrame 생성하기
Dunn_Index_result = pd.DataFrame(
columns=['intra cluster', 'inter cluster', 'Dunn index'])
Dunn_Index_result
| intra cluster | inter cluster | Dunn index |
|---|
len(inter_cluster_distance_type)
5
for i in range(len(intra_cluster_distance_type)):
for j in range(len(inter_cluster_distance_type)):
Dunn_Index_result.loc[len(inter_cluster_distance_type)
* i+j, 'intra cluster'] = intra_cluster_distance_type[i]
Dunn_Index_result.loc[len(inter_cluster_distance_type)
* i+j, 'inter cluster'] = inter_cluster_distance_type[j],
Dunn_Index_result.loc[len(inter_cluster_distance_type)*i+j, 'Dunn index'] = Dunn_index(
X, labels, intra_cluster_distance_type[i], inter_cluster_distance_type[j])
Dunn_Index_result
| intra cluster | inter cluster | Dunn index | |
|---|---|---|---|
| 0 | cmpl_dd | (sld,) | 0.278028 |
| 1 | cmpl_dd | (cmpl_ld,) | 1.58161 |
| 2 | cmpl_dd | (avld,) | 0.762736 |
| 3 | cmpl_dd | (cent_ld,) | 0.737461 |
| 4 | cmpl_dd | (av_cent_ld,) | 0.748688 |
| 5 | avdd | (sld,) | 0.823321 |
| 6 | avdd | (cmpl_ld,) | 4.683603 |
| 7 | avdd | (avld,) | 2.258681 |
| 8 | avdd | (cent_ld,) | 2.183834 |
| 9 | avdd | (av_cent_ld,) | 2.21708 |
| 10 | cent_dd | (sld,) | 0.614026 |
| 11 | cent_dd | (cmpl_ld,) | 3.492995 |
| 12 | cent_dd | (avld,) | 1.684507 |
| 13 | cent_dd | (cent_ld,) | 1.628687 |
| 14 | cent_dd | (av_cent_ld,) | 1.653482 |
Dunn_Index_result[Dunn_Index_result['Dunn index']
== Dunn_Index_result['Dunn index'].max()]
| intra cluster | inter cluster | Dunn index | |
|---|---|---|---|
| 6 | avdd | (cmpl_ld,) | 4.683603 |
X = data_iris_qaoa
labels = np.array(xbest_brute)
for i in range(len(intra_cluster_distance_type)):
for j in range(len(inter_cluster_distance_type)):
print("Dunn Index:", "Intra Cluster Dist.:", '%-10s' % intra_cluster_distance_type[i],
"Inter Cluster Dist.:", '%-12s' % inter_cluster_distance_type[j],
"Dunn Index Valule:", Dunn_index(X, labels, intra_cluster_distance_type[i], inter_cluster_distance_type[j]))
Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: sld Dunn Index Valule: 0.3620139928982456 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 1.5707439215978107 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: avld Dunn Index Valule: 0.8232589728489603 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: cent_ld Dunn Index Valule: 0.6917039944618047 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 0.7551335394782271 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: sld Dunn Index Valule: 0.4897846758927012 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 2.125128634921179 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: avld Dunn Index Valule: 1.1138233247959632 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: cent_ld Dunn Index Valule: 0.9358368001990114 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 1.021653425405049 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: sld Dunn Index Valule: 0.39815239144344045 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 1.7275449595817183 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: avld Dunn Index Valule: 0.9054415996268316 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: cent_ld Dunn Index Valule: 0.760754017713779 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 0.8305154786845887
# 빈 DataFrame 생성하기
Dunn_Index_result_brute = pd.DataFrame(
columns=['intra cluster', 'inter cluster', 'Dunn index'])
Dunn_Index_result_brute
| intra cluster | inter cluster | Dunn index |
|---|
for i in range(len(intra_cluster_distance_type)):
for j in range(len(inter_cluster_distance_type)):
Dunn_Index_result_brute.loc[len(inter_cluster_distance_type)
* i+j, 'intra cluster'] = intra_cluster_distance_type[i]
Dunn_Index_result_brute.loc[len(inter_cluster_distance_type)
* i+j, 'inter cluster'] = inter_cluster_distance_type[j]
Dunn_Index_result_brute.loc[len(inter_cluster_distance_type)*i+j, 'Dunn index'] = Dunn_index(
X, labels, intra_cluster_distance_type[i], inter_cluster_distance_type[j])
Dunn_Index_result_brute
| intra cluster | inter cluster | Dunn index | |
|---|---|---|---|
| 0 | cmpl_dd | sld | 0.362014 |
| 1 | cmpl_dd | cmpl_ld | 1.570744 |
| 2 | cmpl_dd | avld | 0.823259 |
| 3 | cmpl_dd | cent_ld | 0.691704 |
| 4 | cmpl_dd | av_cent_ld | 0.755134 |
| 5 | avdd | sld | 0.489785 |
| 6 | avdd | cmpl_ld | 2.125129 |
| 7 | avdd | avld | 1.113823 |
| 8 | avdd | cent_ld | 0.935837 |
| 9 | avdd | av_cent_ld | 1.021653 |
| 10 | cent_dd | sld | 0.398152 |
| 11 | cent_dd | cmpl_ld | 1.727545 |
| 12 | cent_dd | avld | 0.905442 |
| 13 | cent_dd | cent_ld | 0.760754 |
| 14 | cent_dd | av_cent_ld | 0.830515 |
Dunn_Index_result_brute[Dunn_Index_result_brute['Dunn index']
== Dunn_Index_result_brute['Dunn index'].max()]
| intra cluster | inter cluster | Dunn index | |
|---|---|---|---|
| 6 | avdd | cmpl_ld | 2.125129 |
X = data_iris_qaoa
labels = np.array(xbest_qaoa)
for i in range(len(intra_cluster_distance_type)):
for j in range(len(inter_cluster_distance_type)):
print("Dunn Index:", "Intra Cluster Dist.:", '%-10s' % intra_cluster_distance_type[i],
"Inter Cluster Dist.:", '%-12s' % inter_cluster_distance_type[j],
"Dunn Index Valule:", Dunn_index(X, labels, intra_cluster_distance_type[i], inter_cluster_distance_type[j]))
Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: sld Dunn Index Valule: 0.230472954834034 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 0.8636888499692471 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: avld Dunn Index Valule: 0.5121246670623536 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: cent_ld Dunn Index Valule: 0.26807748430821293 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 0.38156219816433296 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: sld Dunn Index Valule: 0.33645512124307064 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 1.2608530876950403 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: avld Dunn Index Valule: 0.7476233689636662 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: cent_ld Dunn Index Valule: 0.3913519594974095 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 0.557022214331395 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: sld Dunn Index Valule: 0.3073016314328899 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 1.151601465938044 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: avld Dunn Index Valule: 0.6828425738656213 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: cent_ld Dunn Index Valule: 0.3574417151793882 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 0.5087568130215343
# 빈 DataFrame 생성하기
Dunn_Index_result_qaoa = pd.DataFrame(
columns=['intra cluster', 'inter cluster', 'Dunn index'])
Dunn_Index_result_qaoa
| intra cluster | inter cluster | Dunn index |
|---|
for i in range(len(intra_cluster_distance_type)):
for j in range(len(inter_cluster_distance_type)):
Dunn_Index_result_qaoa.loc[len(inter_cluster_distance_type)
* i+j, 'intra cluster'] = intra_cluster_distance_type[i]
Dunn_Index_result_qaoa.loc[len(inter_cluster_distance_type)
* i+j, 'inter cluster'] = inter_cluster_distance_type[j]
Dunn_Index_result_qaoa.loc[len(inter_cluster_distance_type)*i+j, 'Dunn index'] = Dunn_index(
X, labels, intra_cluster_distance_type[i], inter_cluster_distance_type[j])
Dunn_Index_result_qaoa
| intra cluster | inter cluster | Dunn index | |
|---|---|---|---|
| 0 | cmpl_dd | sld | 0.230473 |
| 1 | cmpl_dd | cmpl_ld | 0.863689 |
| 2 | cmpl_dd | avld | 0.512125 |
| 3 | cmpl_dd | cent_ld | 0.268077 |
| 4 | cmpl_dd | av_cent_ld | 0.381562 |
| 5 | avdd | sld | 0.336455 |
| 6 | avdd | cmpl_ld | 1.260853 |
| 7 | avdd | avld | 0.747623 |
| 8 | avdd | cent_ld | 0.391352 |
| 9 | avdd | av_cent_ld | 0.557022 |
| 10 | cent_dd | sld | 0.307302 |
| 11 | cent_dd | cmpl_ld | 1.151601 |
| 12 | cent_dd | avld | 0.682843 |
| 13 | cent_dd | cent_ld | 0.357442 |
| 14 | cent_dd | av_cent_ld | 0.508757 |
Dunn_Index_result_qaoa[Dunn_Index_result_qaoa['Dunn index']
== Dunn_Index_result_qaoa['Dunn index'].max()]
| intra cluster | inter cluster | Dunn index | |
|---|---|---|---|
| 6 | avdd | cmpl_ld | 1.260853 |
X = data_iris_qaoa
labels = y_kmeans
for i in range(len(intra_cluster_distance_type)):
for j in range(len(inter_cluster_distance_type)):
print("Dunn Index:", "Intra Cluster Dist.:", '%-10s' % intra_cluster_distance_type[i],
"Inter Cluster Dist.:", '%-12s' % inter_cluster_distance_type[j],
"Dunn Index Valule:", Dunn_index(X, labels, intra_cluster_distance_type[i], inter_cluster_distance_type[j]))
Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: sld Dunn Index Valule: 0.7599392072950275 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 1.6224114290107803 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: avld Dunn Index Valule: 1.133292183837154 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: cent_ld Dunn Index Valule: 1.0861701170767721 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 1.1274019254921064 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: sld Dunn Index Valule: 1.2151750370192227 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 2.5943047146180387 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: avld Dunn Index Valule: 1.8121822880409244 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: cent_ld Dunn Index Valule: 1.7368321038810761 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 1.8027635150209436 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: sld Dunn Index Valule: 0.9354067839788758 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 1.997021133444915 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: avld Dunn Index Valule: 1.394965790441124 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: cent_ld Dunn Index Valule: 1.3369633864334904 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 1.3877154899401698
# 빈 DataFrame 생성하기
Dunn_Index_result_kmeans = pd.DataFrame(
columns=['intra cluster', 'inter cluster', 'Dunn index'])
Dunn_Index_result_kmeans
| intra cluster | inter cluster | Dunn index |
|---|
for i in range(len(intra_cluster_distance_type)):
for j in range(len(inter_cluster_distance_type)):
Dunn_Index_result_kmeans.loc[len(inter_cluster_distance_type)
* i+j, 'intra cluster'] = intra_cluster_distance_type[i]
Dunn_Index_result_kmeans.loc[len(inter_cluster_distance_type)
* i+j, 'inter cluster'] = inter_cluster_distance_type[j]
Dunn_Index_result_kmeans.loc[len(inter_cluster_distance_type)*i+j, 'Dunn index'] = Dunn_index(
X, labels, intra_cluster_distance_type[i], inter_cluster_distance_type[j])
Dunn_Index_result_kmeans
| intra cluster | inter cluster | Dunn index | |
|---|---|---|---|
| 0 | cmpl_dd | sld | 0.759939 |
| 1 | cmpl_dd | cmpl_ld | 1.622411 |
| 2 | cmpl_dd | avld | 1.133292 |
| 3 | cmpl_dd | cent_ld | 1.08617 |
| 4 | cmpl_dd | av_cent_ld | 1.127402 |
| 5 | avdd | sld | 1.215175 |
| 6 | avdd | cmpl_ld | 2.594305 |
| 7 | avdd | avld | 1.812182 |
| 8 | avdd | cent_ld | 1.736832 |
| 9 | avdd | av_cent_ld | 1.802764 |
| 10 | cent_dd | sld | 0.935407 |
| 11 | cent_dd | cmpl_ld | 1.997021 |
| 12 | cent_dd | avld | 1.394966 |
| 13 | cent_dd | cent_ld | 1.336963 |
| 14 | cent_dd | av_cent_ld | 1.387715 |
Dunn_Index_result_kmeans[Dunn_Index_result_kmeans['Dunn index']
== Dunn_Index_result_kmeans['Dunn index'].max()]
| intra cluster | inter cluster | Dunn index | |
|---|---|---|---|
| 6 | avdd | cmpl_ld | 2.594305 |
X = data_iris_qaoa
labels = data_iris_qaoa_label2
for i in range(len(intra_cluster_distance_type)):
for j in range(len(inter_cluster_distance_type)):
print("Dunn Index:", "Intra Cluster Dist.:", '%-10s' % intra_cluster_distance_type[i],
"Inter Cluster Dist.:", '%-12s' % inter_cluster_distance_type[j],
"Dunn Index Valule:", Dunn_index(X, labels, intra_cluster_distance_type[i], inter_cluster_distance_type[j]))
Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: sld Dunn Index Valule: 0.3546103537603096 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 1.6102218391443723 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: avld Dunn Index Valule: 0.8321369993089068 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: cent_ld Dunn Index Valule: 0.6827373137019518 Dunn Index: Intra Cluster Dist.: cmpl_dd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 0.7542897177692112 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: sld Dunn Index Valule: 0.5298462144104747 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 2.405936365886254 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: avld Dunn Index Valule: 1.243349592811763 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: cent_ld Dunn Index Valule: 1.020121881004832 Dunn Index: Intra Cluster Dist.: avdd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 1.1270329455718626 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: sld Dunn Index Valule: 0.4293377071669016 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: cmpl_ld Dunn Index Valule: 1.9495453111208467 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: avld Dunn Index Valule: 1.0074939649774532 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: cent_ld Dunn Index Valule: 0.8266111515181995 Dunn Index: Intra Cluster Dist.: cent_dd Inter Cluster Dist.: av_cent_ld Dunn Index Valule: 0.9132418569636513
# 빈 DataFrame 생성하기
Dunn_Index_result_truelabel = pd.DataFrame(
columns=['intra cluster', 'inter cluster', 'Dunn index'])
Dunn_Index_result_truelabel
| intra cluster | inter cluster | Dunn index |
|---|
for i in range(len(intra_cluster_distance_type)):
for j in range(len(inter_cluster_distance_type)):
Dunn_Index_result_truelabel.loc[len(inter_cluster_distance_type)
* i+j, 'intra cluster'] = intra_cluster_distance_type[i]
Dunn_Index_result_truelabel.loc[len(inter_cluster_distance_type)
* i+j, 'inter cluster'] = inter_cluster_distance_type[j]
Dunn_Index_result_truelabel.loc[len(inter_cluster_distance_type)*i+j, 'Dunn index'] = Dunn_index(
X, labels, intra_cluster_distance_type[i], inter_cluster_distance_type[j])
Dunn_Index_result_truelabel
| intra cluster | inter cluster | Dunn index | |
|---|---|---|---|
| 0 | cmpl_dd | sld | 0.35461 |
| 1 | cmpl_dd | cmpl_ld | 1.610222 |
| 2 | cmpl_dd | avld | 0.832137 |
| 3 | cmpl_dd | cent_ld | 0.682737 |
| 4 | cmpl_dd | av_cent_ld | 0.75429 |
| 5 | avdd | sld | 0.529846 |
| 6 | avdd | cmpl_ld | 2.405936 |
| 7 | avdd | avld | 1.24335 |
| 8 | avdd | cent_ld | 1.020122 |
| 9 | avdd | av_cent_ld | 1.127033 |
| 10 | cent_dd | sld | 0.429338 |
| 11 | cent_dd | cmpl_ld | 1.949545 |
| 12 | cent_dd | avld | 1.007494 |
| 13 | cent_dd | cent_ld | 0.826611 |
| 14 | cent_dd | av_cent_ld | 0.913242 |
Dunn_Index_result_truelabel[Dunn_Index_result_truelabel['Dunn index']
== Dunn_Index_result_truelabel['Dunn index'].max()]
| intra cluster | inter cluster | Dunn index | |
|---|---|---|---|
| 6 | avdd | cmpl_ld | 2.405936 |
Dunn_Index_results_combined = Dunn_Index_result_brute[[
'intra cluster', 'inter cluster']]
Dunn_Index_results_combined['Dunn index - Brute'] = Dunn_Index_result_brute['Dunn index']
Dunn_Index_results_combined['Dunn index - QAOA'] = Dunn_Index_result_qaoa['Dunn index']
Dunn_Index_results_combined['Dunn index - K-Means'] = Dunn_Index_result_kmeans['Dunn index']
Dunn_Index_results_combined['Dunn index - True label'] = Dunn_Index_result_truelabel['Dunn index']
Dunn_Index_results_combined
| intra cluster | inter cluster | Dunn index - Brute | Dunn index - QAOA | Dunn index - K-Means | Dunn index - True label | |
|---|---|---|---|---|---|---|
| 0 | cmpl_dd | sld | 0.362014 | 0.230473 | 0.759939 | 0.35461 |
| 1 | cmpl_dd | cmpl_ld | 1.570744 | 0.863689 | 1.622411 | 1.610222 |
| 2 | cmpl_dd | avld | 0.823259 | 0.512125 | 1.133292 | 0.832137 |
| 3 | cmpl_dd | cent_ld | 0.691704 | 0.268077 | 1.08617 | 0.682737 |
| 4 | cmpl_dd | av_cent_ld | 0.755134 | 0.381562 | 1.127402 | 0.75429 |
| 5 | avdd | sld | 0.489785 | 0.336455 | 1.215175 | 0.529846 |
| 6 | avdd | cmpl_ld | 2.125129 | 1.260853 | 2.594305 | 2.405936 |
| 7 | avdd | avld | 1.113823 | 0.747623 | 1.812182 | 1.24335 |
| 8 | avdd | cent_ld | 0.935837 | 0.391352 | 1.736832 | 1.020122 |
| 9 | avdd | av_cent_ld | 1.021653 | 0.557022 | 1.802764 | 1.127033 |
| 10 | cent_dd | sld | 0.398152 | 0.307302 | 0.935407 | 0.429338 |
| 11 | cent_dd | cmpl_ld | 1.727545 | 1.151601 | 1.997021 | 1.949545 |
| 12 | cent_dd | avld | 0.905442 | 0.682843 | 1.394966 | 1.007494 |
| 13 | cent_dd | cent_ld | 0.760754 | 0.357442 | 1.336963 | 0.826611 |
| 14 | cent_dd | av_cent_ld | 0.830515 | 0.508757 | 1.387715 | 0.913242 |